GIT ee42865f70f77e46cd316b795c1d14f7c10b0e9f git+ssh://master.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.24.git commit ee42865f70f77e46cd316b795c1d14f7c10b0e9f Author: David S. Miller Date: Thu Aug 30 22:20:32 2007 -0700 [WIRELESS] rtl8187: Remove reference to IEEE80211_HW_DATA_NULLFUNC_ACK. It has been deleted. Signed-off-by: David S. Miller commit 290aeb9488e9f4caf0aa0b27b0bbc2acc19e9062 Author: David S. Miller Date: Wed Aug 29 20:34:16 2007 -0700 [FORCEDETH]: Fix build failures with NAPI disabled. Based upon a report from Joe Perches. Signed-off-by: David S. Miller commit 6c38cf8b1d0064c1342c64606fe44dac57f866cc Author: David S. Miller Date: Tue Aug 28 17:58:44 2007 -0700 [TG3/BNX2]: Revert unintentional TX locking changes. Noticed by Michael Chan. Signed-off-by: David S. Miller commit 8718b305f2f2a26142da80433e8c98b23b2dc3bf Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: make assoc_ap a flag The sta_info.assoc_ap value is used as a flag, move it into flags. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 78420a281ae00e98ae99609b4a775ef5ee1b251b Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: remove hostapd interface stuff This removes some definitions that are used only within ioctls that will never make it into mainline. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 22738e856d6bbe16fbd18ff759dc569f0ed765ec Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: improve key selection comment When I changed the code there I forgot to mention what happens with multicast frames in a regular BSS and keep wondering myself if the code is correct. Add appropriate comments. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit e3e015d51d8c21ed169603d7232721b99a02e9f5 Author: John W. Linville Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: filter locally-originated multicast frames In STA mode, the AP will echo our traffic. This includes multicast traffic. Receiving these frames confuses some protocols and applications, notably IPv6 Duplicate Address Detection. Signed-off-by: John W. Linville Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit ebed62efecf2059b79efbf9db4de1a19e9f68e18 Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: clean up whitespace This cleans up some whitespace to make the mac80211 version in mainline diverge less from wireless-dev. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit d0a083d3f08f0e6ce8263a75058edc93d43b72e8 Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: fix preamble setting It looks like in commit 28487a90 the condition was unintentionally negated by moving some code, fix it. Signed-off-by: Johannes Berg Cc: Daniel Drake Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 36cdb3609ab195d6e60340cc9e17f7ce339a03d4 Author: Larry Finger Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: Remove overly sticky averaging filters for rssi, signal, noise The current version of wireless statistics contains a bug in the averaging that makes the numbers be too sticky and not react to small changes. This patch removes all averaging. Signed-off-by: Larry Finger Signed-off-by: Jiri Benc Signed-off-by: John W. Linville Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit bf212e80268d5718be95c1404da68a498e52d7c8 Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: add interface index to key debugfs Add a new file 'ifindex' to each key's debugfs dir to allow finding which interface the key was configured on. This isn't done as a symlink because of possible netdev name changes. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 91c8581c1576c574e36a4f9520d90bc0ecc2532d Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: rework key handling This moves all the key handling code out from ieee80211_ioctl.c into key.c and also does the following changes including documentation updates in mac80211.h: 1) Turn off hardware acceleration for keys when the interface is down. This is necessary because otherwise monitor interfaces could be decrypting frames for other interfaces that are down at the moment. Also, it should go some way towards better suspend/resume support, in any case the routines used here could be used for that as well. Additionally, this makes the driver interface nicer, keys for a specific local MAC address are only ever present while an interface with that MAC address is enabled. 2) Change driver set_key() callback interface to allow only return values of -ENOSPC, -EOPNOTSUPP and 0, warn on all other return values. This allows debugging the stack when a driver notices it's handed a key while it is down. 3) Invert the flag meaning to KEY_FLAG_UPLOADED_TO_HARDWARE. 4) Remove REMOVE_ALL_KEYS command as it isn't used nor do we want to use it, we'll use DISABLE_KEY for each key. It is hard to use REMOVE_ALL_KEYS because we can handle multiple virtual interfaces with different key configuration, so we'd have to keep track of a lot of state for this and that isn't worth it. 5) Warn when disabling a key fails, it musn't. 6) Remove IEEE80211_HW_NO_TKIP_WMM_HWACCEL in favour of per-key IEEE80211_KEY_FLAG_WMM_STA to let driver sort it out itself. 7) Tell driver that a (non-WEP) key is used only for transmission by using an all-zeroes station MAC address when configuring. 8) Change the set_key() callback to have access to the local MAC address the key is being added for. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit d77fb60dbd914584e9f9778a8dcc218a14efc229 Author: Johannes Berg Date: Tue Aug 28 17:01:55 2007 -0400 [MAC80211]: remove fake set_key() call Remove adding a fake key with a NONE key algorithm for each associated STA. If we have hardware with such TX filtering we should probably extend the sta_table_notification() callback with the sta information instead; the fact that it's treated as a key for some atheros hardware shouldn't bother the stack. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit ba3625177ef839ba61fb93e4a01b482b77b13234 Author: Johannes Berg Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211] key handling: remove default_wep_only Remove the default_wep_only stuff, this wasn't really done well and no current driver actually cares. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit fc1c9bf491e030b1774391055df8210e88fcecbc Author: Johannes Berg Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: remove krefs for keys they aren't really refcounted anyway Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 2bed3e139d1797f3102ded87af05822f29875561 Author: Johannes Berg Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: embed key conf in key, fix driver interface This patch embeds the struct ieee80211_key_conf into struct ieee80211_key and thus avoids allocations and having data present twice. This required some more changes: 1) The removal of the IEEE80211_KEY_DEFAULT_TX_KEY key flag. This flag isn't used by drivers nor should it be since we have a set_key_idx() callback. Maybe that callback needs to be extended to include the key conf, but only a driver that requires it will tell. 2) The removal of the IEEE80211_KEY_DEFAULT_WEP_ONLY key flag. This flag is global, so it shouldn't be passed in the key conf structure. Pass it to the function instead. Also, this patch removes the AID parameter to the set_key() callback because it is currently unused and the hardware currently cannot know about the AID anyway. I suspect this was used with some hardware that actually selected the AID itself, but that functionality was removed. Additionally, I've removed the ALG_NULL key algorithm since we have ALG_NONE. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 069c2a057e04d65b90a1d32eb2222908e851f2f7 Author: Jiri Slaby Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: Remove bitfields from struct ieee80211_sub_if_data mac80211, remove bitfields from struct ieee80211_sub_if_data Signed-off-by: Jiri Slaby Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit dec5645abc1e814ea4b994595dd9df71409f4191 Author: Jiri Slaby Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: Remove bitfields from struct ieee80211_if_sta mac80211, remove bitfields from struct ieee80211_if_sta Signed-off-by: Jiri Slaby Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit a65b6a7ea6d01a88e3bda5918b6d1cf99d5b5337 Author: Jiri Slaby Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: Remove bitfields from struct ieee80211_txrx_data mac80211, remove bitfields from struct ieee80211_txrx_data Signed-off-by: Jiri Slaby Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 4793f158aa096319cf68d194ed3294e9477d78eb Author: Jiri Slaby Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: Remove bitfields from struct ieee80211_tx_packet_data remove bitfields from struct ieee80211_tx_packet_data [Johannes: completely clear flags in ieee80211_remove_tx_extra] Signed-off-by: Jiri Slaby Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit d2e7d9f107f5a09300744b49692d0c933e666e1a Author: Johannes Berg Date: Tue Aug 28 17:01:54 2007 -0400 [MAC80211]: use switch statement in tx code The transmit code needs to set the addresses depending on the interface type, a likely() for AP/VLAN is quite wrong since most people will be using STA; convert to a switch statement to make it look nicer. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 496896ffb18206da68b4edad7900afeb04fa732d Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: refactor event sending Create a new file event.c that will contain code to send mac/mlme events to userspace. For now put the Michael MIC failure condition into it and remove sending of that condition via the management interface, hostapd interestingly doesn't do anything when it gets such a packet besides printing a message, it reacts only on the private iwevent. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 850e661de84e45021e67fa8e5cd2a7e7f2433b9c Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: kill key_mgmt, use key_management_enabled The key_mgmt variable for STA interfaces doesn't seem well-defined nor do we actually use the values other than "NONE", so change it to be named better. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit b5eef3c2cda6965da798c9cf87e8a2738b1ca524 Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: remove unused ioctls (3) The ioctls * PRISM2_PARAM_RADAR_DETECT * PRISM2_PARAM_SPECTRUM_MGMT are not used by hostapd or wpa_supplicant, Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit bd34fcbbb4549c3690b433e5f1acf1924d32a51e Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: remove unused ioctls (2) The ioctls * PRISM2_PARAM_STA_ANTENNA_SEL * PRISM2_PARAM_TX_POWER_REDUCTION * PRISM2_PARAM_DEFAULT_WEP_ONLY are not used by hostapd or wpa_supplicant. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit f12f73dff6e39c65fd80fdd757567cf495996241 Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: remove unused ioctls (1) The ioctls * PRISM2_PARAM_ANTENNA_MODE * PRISM2_PARAM_STAT_TIME are not used by hostapd or wpa_supplicant. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 5dcf7ba37f8eec04b46460fda7343857e1540e94 Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: fix software decryption When doing key selection for software decryption, mac80211 gets a few things wrong: it always uses pairwise keys if configured, even if the frame is addressed to a multicast address. Also, it doesn't allow using a key index of zero if a pairwise key has also been found. This patch changes the key selection code to be (more) in line with the 802.11 specification. I have confirmed that with this, multicast frames are correctly decrypted and I've tested with WEP as well. While at it, I've cleaned up the semantics of the hardware flags IEEE80211_HW_WEP_INCLUDE_IV and IEEE80211_HW_DEVICE_HIDES_WEP and clarified them in the mac80211.h header; it is also now allowed to set the IEEE80211_HW_DEVICE_HIDES_WEP option even if it only applies to frames that have been decrypted by the hw, unencrypted frames must be dropped but encrypted frames that the hardware couldn't handle can be passed up unmodified. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville commit 94add00428ca975cd417c420c1f0fffc9b007009 Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: remove radar stuff Unused in drivers, userspace and mac80211. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 931423ade87ee066219641e8be25f7c9b15ac04a Author: Johannes Berg Date: Tue Aug 28 17:01:53 2007 -0400 [MAC80211]: remove ieee80211_msg_wep_frame_unknown_key Neither hostapd nor wpa_supplicant really use it. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville commit af3caf4fba63e7580021808b7605ad34143e15be Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: ratelimit some RX messages Many if not all of these messages can be triggered by sending a few rogue frames which is trivially done and then we overflow our logs. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 93765f7c7ea4c579ade25271a600c27ebc70aa56 Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: remove IEEE80211_HW_DATA_NULLFUNC_ACK Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit abcd33efdf34d947d0370109a1bdf5baef0bd233 Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: remove PRISM2_PARAM_RADIO_ENABLED This now is unused in hostapd/wpa_supplicant. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 08d3f4b63b33760c3f8c33b7d8f5c9553fe5ec74 Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: remove IEEE80211_HW_HOST_GEN_BEACON flag The flag is never checked because drivers can simply call ieee80211_beacon_get() regardless of setting this flag. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 0e5e0d4b310d4254425b6c54d793a1a96eb87318 Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: remove reset callback The callback isn't used so remove it. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 98fa93109f4220d6d57c6e93132012e9ce49daab Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: fix key debugfs This fixes two issues with the key debugfs: 1) key index obviously isn't unique 2) various missing break statements led to bogus output Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 193f147c7433489045dcc3ca561bce0335860794 Author: Johannes Berg Date: Tue Aug 28 17:01:52 2007 -0400 [MAC80211]: avoid copying packets to interfaces that are down David Woodhouse noticed that under some circumstances the number of slab allocations kept growing. After looking a bit, this seemed to happen when you had a management mode interface that was *down*. The reason for this is that when the device is down, all management frames get queued to the in-kernel MLME (via ieee80211_sta_rx_mgmt) but then the sta work is invoked but doesn't run when the netif is down. When you then bring the interface up, all such frames are freed, but if you change the mode all of them are lost because the skb queue is reinitialised as soon as you go back to managed mode. The skb queue is correctly cleared when the interface is brought down, but the code doesn't account for the fact that it may be filled while it is not up. This patch should fix the issue by simply ignoring all interfaces that are down when going through the RX handlers. Signed-off-by: Johannes Berg Acked-by: Michael Wu Signed-off-by: John W. Linville Signed-off-by: David S. Miller commit 459da6a660fa1aa0b753466c3cf534274edcd9a3 Author: Alex Villacís Lasso Date: Tue Aug 28 15:58:31 2007 -0700 [IrDA]: Kingsun KS-959 IrDA USB driver This dongle does not follow the usb-irda specification, so it needs its own special driver. First, it uses control URBs for data transfer, instead of bulk or interrupt transfers; the only interrupt endpoint exposed seems to be a dummy to prevent the interface from being rejected. Second, it uses obfuscation and padding at the USB traffic level, for no apparent reason other than to make reverse engineering harder (full details on obfuscation in comments at beginning of source). Although it is advertised as a "4 Mbps FIR dongle", it apparently loses packets at speeds greater than 57600 bps. On plugin, this dongle reports vendor and device IDs: 0x07d0:0x4959 . The Windows driver that is used normally to control this dongle has a filename of KS-959.SYS . Signed-off-by: Alex Villacís Lasso Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller commit 93585f760b6e58df896ae92f5d1b8b3fab706d3c Author: Alex Villacís Lasso Date: Tue Aug 28 15:57:50 2007 -0700 [IrDA]: Kingsun Dazzle IrDA USB driver This dongle does not follow the usb-irda specification, so it needs its own special driver. Just like the Kingsun/Donshine dongle, it exposes two interrupt endpoints. Reception is performed through direct reads from the input endpoint. Transmission requires splitting the IrDA frames into 8-byte segments, in which the first byte encodes how many of the remaining 7 bytes are used as data. Speed change is made with a control URB just like the one in cypress_m8, and it seems to support up to 115200 bps. On plugin, this dongle reports vendor and device IDs: 0x07d0:0x4100 Signed-off-by: Alex Villacís Lasso Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller commit a485da72e17c8391120f6802a1f968f73494c139 Author: Samuel Ortiz Date: Tue Aug 28 15:57:12 2007 -0700 [IrDA]: MSG_NOSIGNAL support for IrDA sockets Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller commit 20fccfff42e3da5ba46d5d5744c3eac247c09792 Author: Ilpo Järvinen Date: Tue Aug 28 15:50:33 2007 -0700 [NET]: DIV_ROUND_UP cleanup (part two) Hopefully captured all single statement cases under net/. I'm not too sure if there is some policy about #includes that are "guaranteed" (ie., in the current tree) to be available through some other #included header, so I just added linux/kernel.h to each changed file that didn't #include it previously. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit f680ec9795914e83c99b3f51979d93a0d4d7b0ae Author: Robert Olsson Date: Tue Aug 28 15:48:35 2007 -0700 [SOFTIRQ]: Remove do_softirq() symbol export. As noted by Christoph Hellwig, pktgen was the only user so it can now be removed. Signed-off-by: Robert Olsson Signed-off-by: David S. Miller commit 4b38bc11f572f95db46073dbc8fe177e0e7be3a8 Author: Robert Olsson Date: Tue Aug 28 15:46:58 2007 -0700 [PKTGEN]: Remove softirq scheduling. It's not a job for pktgen. Signed-off-by: Robert Olsson Signed-off-by: David S. Miller commit bbaa3e3f05c6159e1fa59f1e1c60b97a5915c3e9 Author: Robert Olsson Date: Tue Aug 28 15:45:55 2007 -0700 [PKTGEN]: Multiqueue support. Below some pktgen support to send into different TX queues. This can of course be feed into input queues on other machines Signed-off-by: Robert Olsson Signed-off-by: David S. Miller commit cd86303183bddeddf55a2d2639a7317918418b51 Author: Robert Olsson Date: Tue Aug 28 15:43:14 2007 -0700 [PKTGEN]: Fix multiqueue oops. Initially pkt_dev can be NULL this causes netif_subqueue_stopped to oops. The patch below should cure it. But maybe the pktgen TX logic should be reworked to better support the new multiqueue support. Signed-off-by: Robert Olsson Signed-off-by: David S. Miller commit 2a70e39c590a2b90ea283c30f51b5ec1d56629ad Author: Denis Cheng Date: Tue Aug 28 15:41:11 2007 -0700 [NETLINK]: use container_of instead This could make future redesign of struct netlink_sock easier. Signed-off-by: Denis Cheng Signed-off-by: David S. Miller commit 9c7b70488362ed205072653caac48be3ea9f4168 Author: Thomas Graf Date: Sat Aug 25 13:46:55 2007 -0700 [XFRM] policy: Replace magic number with XFRM_POLICY_OUT Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 0f945e735ea8c9052bf7ed2772cba7d92a8d3052 Author: Masahide NAKAMURA Date: Fri Aug 24 23:33:01 2007 -0700 [IPV4] IPSEC: Omit redirect for tunnelled packet. IPv4 IPsec tunnel gateway incorrectly sends redirect to sender if it is onlink host when network device the IPsec tunnelled packet is arrived is the same as the one the decapsulated packet is sent. With this patch, it omits to send the redirect when the forwarding skbuff carries secpath, since such skbuff should be assumed as a decapsulated packet from IPsec tunnel by own. Request for comments: Alternatively we'd have another way to change net/ipv4/route.c (__mkroute_input) to use RTCF_DOREDIRECT flag unless skbuff has no secpath. It is better than this patch at performance point of view because IPv4 redirect judgement is done at routing slow-path. However, it should be taken care of resource changes between SAD(XFRM states) and routing table. In other words, When IPv4 SAD is changed does the related routing entry go to its slow-path? If not, it is reasonable to apply this patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller commit bc8f4748287b55eef1a3b0d803d1f7130f55e89b Author: Masahide NAKAMURA Date: Fri Aug 24 19:08:55 2007 +0900 [IPV6] IPSEC: Omit redirect for tunnelled packet. IPv6 IPsec tunnel gateway incorrectly sends redirect to router or sender when network device the IPsec tunnelled packet is arrived is the same as the one the decapsulated packet is sent. With this patch, it omits to send the redirect when the forwarding skbuff carries secpath, since such skbuff should be assumed as a decapsulated packet from IPsec tunnel by own. It may be a rare case for an IPsec security gateway, however it is not rare when the gateway is MIPv6 Home Agent since the another tunnel end-point is Mobile Node and it changes the attached network. Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller commit a85d5450ddeb959bdf9e4603f9c06e9d79217cfa Author: Noriaki TAKAMIYA Date: Fri Aug 24 23:31:39 2007 -0700 [IPV6] XFRM: Fix connected socket to use transformation. When XFRM policy and state are ready after TCP connection is started, the traffic should be transformed immediately, however it does not on IPv6 TCP. It depends on a dst cache replacement policy with connected socket. It seems that the replacement is always done for IPv4, however, on IPv6 case it is done only when routing cookie is changed. This patch fix that non-transformation dst can be changed to transformation one. This behavior is required by MIPv6 and improves IPv6 IPsec. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller commit e8b932bd2a8209e23d18bbae9b1d63d8b6b290ec Author: Masahide NAKAMURA Date: Fri Aug 24 23:30:47 2007 -0700 [XFRM]: Fix pointer copy size for encap_tmpl and coaddr. This is minor fix about sizeof argument using with kmemdup(). Signed-off-by: Masahide NAKAMURA Signed-off-by: David S. Miller commit 90c720c250587a256590dc8f320fcc3411b41d01 Author: Brian Haley Date: Fri Aug 24 23:16:08 2007 -0700 [IPV6]: Add v4mapped address inline Add v4mapped address inline to avoid calls to ipv6_addr_type(). Signed-off-by: Brian Haley Signed-off-by: David S. Miller commit c889b33bb1f6fae105ab7a1d27704f7940d98597 Author: Stephen Hemminger Date: Fri Aug 24 23:10:44 2007 -0700 [SHAPER]: Mark for removal. This driver has been marked obsolete for a long time and is superseded by traffic schedulers. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit 2de9a58adf2dbdf80ec7145a49bc47cc28c7895a Author: Stephen Hemminger Date: Fri Aug 24 23:09:41 2007 -0700 [UDP]: Randomize port selection. This patch causes UDP port allocation to be randomized like TCP. The earlier code would always choose same port (ie first empty list). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit ad6584d4cc469adf13d3835821399370dc3a5a12 Author: Ilpo Järvinen Date: Fri Aug 24 23:00:31 2007 -0700 [NET] Cleanup: DIV_ROUND_UP Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit cb25b535738122b30a313c4d1c58ad09f0e2c1a6 Author: Ilpo Järvinen Date: Fri Aug 24 22:55:52 2007 -0700 [TCP] MIB: Add counters for discarded SACK blocks In DSACK case, some events are not extraordinary, such as packet duplication generated DSACK. They can arrive easily below snd_una when undo_marker is not set (TCP being in CA_Open), counting such DSACKs amoung SACK discards will likely just mislead if they occur in some scenario when there are other problems as well. Similarly, excessively delayed packets could cause "normal" DSACKs. Therefore, separate counters are allocated for DSACK events. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 30f4a5d9dde5e2a4238c371a34996b5011c6ce18 Author: Ilpo Järvinen Date: Fri Aug 24 22:54:44 2007 -0700 [TCP]: Discard fuzzy SACK blocks SACK processing code has been a sort of russian roulette as no validation of SACK blocks is previously attempted. Besides, it is not very clear what all kinds of broken SACK blocks really mean (e.g., one that has start and end sequence numbers reversed). So now close the roulette once and for all. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 34d9773137f36b3df47650eb5fc3c0da4ba06934 Author: Ilpo Järvinen Date: Fri Aug 24 22:53:26 2007 -0700 [TCP]: Rename tcp_ack_packets_out -> tcp_rearm_rto Only thing that tiny function does is rearming the RTO (if necessary), name it accordingly. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit eac2ce468ab52293e45eccbaf465d947b3f8cf74 Author: Ilpo Järvinen Date: Fri Aug 24 22:44:06 2007 -0700 [TCP]: tcp_packets_out_inc to tcp_output.c (no callers elsewhere) Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 546dca415237cd669b04da4b51d9baa4b8c0d004 Author: Ilpo Järvinen Date: Fri Aug 24 22:43:14 2007 -0700 [TCP]: Remove unnecessary wrapper tcp_packets_out_dec Makes caller side more obvious, there's no need to have a wrapper for this oneliner! Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 819a910d2efc7a8c82194ae780b7cd6d84203838 Author: Stephen Hemminger Date: Tue Aug 28 15:22:09 2007 -0700 [ATM]: Replace DPRINTK() with pr_debug(). Get rid of using DPRINTK macro in ATM and use pr_debug (in kernel.h). Using the standard macro is cleaner and forces code to check for bad arguments and formatting. Fixes from Thomas Graf. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit 32c5b4edb483ba3c9dd8490dfeac79bd90c2c406 Author: Stephen Hemminger Date: Fri Aug 24 22:35:44 2007 -0700 [NET] ethernet: optimize memcpy and memset The ethernet header management only needs to handle a fixed size address (6 bytes). If the memcpy/memset are changed to be passed a constant length, then compiler can optimize for this case (and if it is smart eliminate string instructions). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit 2f97c0ec6114e6acbc54cbb01d4164e2b86f78e1 Author: Stephen Hemminger Date: Wed Aug 22 14:43:38 2007 -0700 [SKY2]: Minor warning fix. Fix minor warning about const changes. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit f25b801bead57b915ee67ed325ab434afbd91137 Author: Thomas Graf Date: Wed Aug 22 14:03:27 2007 -0700 [XFRM] netlink: Inline attach_encap_tmpl(), attach_sec_ctx(), and attach_one_addr() These functions are only used once and are a lot easier to understand if inlined directly into the function. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 5acc3441322aaa9a878dca67e50e73bcd1779a0d Author: Thomas Graf Date: Wed Aug 22 14:02:39 2007 -0700 [XFRM] netlink: Remove dependency on rtnetlink Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit bc2c96f580fc8e7cfc2784549c33b8c8fda42766 Author: Thomas Graf Date: Wed Aug 22 14:02:11 2007 -0700 [XFRM] netlink: Use nla_memcpy() in xfrm_update_ae_params() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 0206676f93886ddad84133f75445205295c92908 Author: Thomas Graf Date: Wed Aug 22 14:01:33 2007 -0700 [XFRM] netlink: Use nlattr instead of rtattr Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 3a63d5eac19697a5f9e8d0587bc5bfc8ccf55700 Author: Thomas Graf Date: Wed Aug 22 14:00:40 2007 -0700 [XFRM] netlink: Rename attribute array from xfrma[] to attrs[] Increases readability a lot. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 45dc0ba49fcc02266d38912c7a4940f6d1d53e0d Author: Thomas Graf Date: Wed Aug 22 13:59:43 2007 -0700 [XFRM] netlink: Enhance indexing of the attribute array nlmsg_parse() puts attributes at array[type] so the indexing method can be simpilfied by removing the obscuring "- 1". Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit cf799e8a1a47703edf18e4e485853399a4ad64c7 Author: Thomas Graf Date: Wed Aug 22 13:59:04 2007 -0700 [XFRM] netlink: Establish an attribute policy Adds a policy defining the minimal payload lengths for all the attributes allowing for most attribute validation checks to be removed from in the middle of the code path. Makes updates more consistent as many format errors are recognised earlier, before any changes have been attempted. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit e4edbd80c901d64030bca4bbd05d210bf40e4718 Author: Thomas Graf Date: Wed Aug 22 13:58:18 2007 -0700 [XFRM] netlink: Use nlmsg_parse() to parse attributes Uses nlmsg_parse() to parse the attributes. This actually changes behaviour as unknown attributes (type > MAXTYPE) no longer cause an error. Instead unknown attributes will be ignored henceforth to keep older kernels compatible with more recent userspace tools. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit cd82f837580abf07d6ea52a5018a445909250440 Author: Thomas Graf Date: Wed Aug 22 13:57:39 2007 -0700 [XFRM] netlink: Use nlmsg_new() and type-safe size calculation helpers Moves all complex message size calculation into own inlined helper functions and makes use of the type-safe netlink interface. Using nlmsg_new() simplifies the calculation itself as it takes care of the netlink header length by itself. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit f5f4edc8077da98ccbb88abe135b94f447f0e011 Author: Thomas Graf Date: Wed Aug 22 13:57:04 2007 -0700 [XFRM] netlink: Clear up some of the CONFIG_XFRM_SUB_POLICY ifdef mess Moves all of the SUB_POLICY ifdefs related to the attribute size calculation into a function. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 92f19ffae003b9e33cbaa2c02f104da000f4bdbc Author: Thomas Graf Date: Wed Aug 22 13:56:23 2007 -0700 [XFRM] netlink: Move algorithm length calculation to its own function Adds alg_len() to calculate the properly padded length of an algorithm attribute to simplify the code. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit a602f3322a7ee7ea3165689a6de9ac121ae417fe Author: Thomas Graf Date: Wed Aug 22 13:55:43 2007 -0700 [XFRM] netlink: Use nla_put()/NLA_PUT() variantes Also makes use of copy_sec_ctx() in another place and removes duplicated code. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit f9a73a0d91792f163afbb7e430f62efea9e15981 Author: Thomas Graf Date: Wed Aug 22 13:54:36 2007 -0700 [XFRM] netlink: Use nlmsg_broadcast() and nlmsg_unicast() This simplifies successful return codes from >0 to 0. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit de4d67a9a31b0db5f4edb9dbe995590aeaf2553f Author: Thomas Graf Date: Wed Aug 22 13:53:52 2007 -0700 [XFRM] netlink: Use nlmsg_data() instead of NLMSG_DATA() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 7088347221b8abf607b93d68ff4da9f3b0d5bdbc Author: Thomas Graf Date: Wed Aug 22 12:47:26 2007 -0700 [XFRM] netlink: Use nlmsg_end() and nlmsg_cancel() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 8ccdcbaf55ec439a5ede00dd5146692fa1836010 Author: Thomas Graf Date: Wed Aug 22 12:46:53 2007 -0700 [XFRM] netlink: Use nlmsg_put() instead of NLMSG_PUT() Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit be6470d3a26fa385bc137a75da7fa8ad59a6bcae Author: Stephen Hemminger Date: Wed Aug 22 12:45:46 2007 -0700 [NET] netdevice: kernel docbook addition Add more kernel doc's for part of the network device API. This is only a start, and needs more work. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit 01bcd6d10e8b64ae9e25235ec25bc55af8c09e37 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:19:44 2007 -0700 [DCCP]: Nuke the timeval helpers now that we fully converted to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 9a13b908134512e8dbef57a1a93d4ca7ce352038 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:19:21 2007 -0700 [DCCP]: Nuke dccp_timestamp and dccps_epoch, not used anymore Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 620bfa5fb2835621b8337ff04438d087ae9ff34a Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:18:55 2007 -0700 [DCCP] options: convert dccp_insert_option_timestamp to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit feba9a5a148f0750b557ea7d384a954cfa283443 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:18:33 2007 -0700 [DCCP]: Convert dccps_timestamp_time to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit ceec6ead5c0867b9238bbc0c419768f92b9daf50 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:18:13 2007 -0700 [DCCP] packet_history: Convert dccphtx_tstamp to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 776f6410637b51cb87324bc2961e87e75fc0265a Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:17:51 2007 -0700 [DCCP] packet_history: convert dccphrx_tstamp to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 8112cb02dde0a17f1de7d7fc01c0d49a73cf77c5 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:17:25 2007 -0700 [DCCP] ackvec: Convert to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit d8cede928f3a90719746faff4b6e570aa8f2afb2 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:17:02 2007 -0700 [DCCP] CCID3: Stop using dccp_timestamp Now to convert the ackvec code to ktime_t so that we can get rid of dccp_timestamp and the epoch thing in dccp_sock. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit fe12c90818a84636df2a32359ebf376d20d73b5d Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:16:35 2007 -0700 [DCCP]: Convert dccp_sample_rtt to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 157c36c951e36dd2939bafb9e050d4e7d87dd820 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:16:05 2007 -0700 [KTIME]: Introduce ktime_sub_ns and ktime_sub_us First user will be the DCCP transport networking protocol. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 290c52838d77d5d33ed0c0fb12a4883510bfebcc Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:15:37 2007 -0700 [DCCP]: Convert ccid3hcrx_tstamp_last_feedback to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 1ad683c6c7ca809a87891bf2b8887ed8214a9bc0 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:15:13 2007 -0700 [DCCP]: Convert ccid3hcrx_tstamp_last_ack to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 67e2b1d1d6fe93a23a77d3f29d8e1651ff15bbff Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:14:52 2007 -0700 [DCCP]: Convert ccid3hctx_t_ld to ktime_t Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit 87c2d8039beaafa34e4fe3c556a515ac0680ddd0 Author: Arnaldo Carvalho de Melo Date: Sun Aug 19 17:14:27 2007 -0700 [DCCP]: Make ccid3_hc_tx_update_x get a timestamp if needed The code was too complicated, if p > 0 in ccid3_hc_tx_no_feedback_timer the timestamp was being obtained to be passed to ccid3_hc_tx_update_x, where only if p > 0 the timestamp was needed, so just leave it to ccid3_hc_tx_update_x to obtain the timestamp if needed. This will help in the upcoming changesets where we'll convert t_ld to ktime_t. We'll eventually try to reuse ktime_get_real() calls again. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller commit a40289755fa8f3c9e895b4fde7e83ebdba788833 Author: Neil Horman Date: Wed Aug 15 16:07:44 2007 -0700 [SCTP]: Rewrite of sctp buffer management code This patch introduces autotuning to the sctp buffer management code similar to the TCP. The buffer space can be grown if the advertised receive window still has room. This might happen if small message sizes are used, which is common in telecom environmens. New tunables are introduced that provide limits to buffer growth and memory pressure is entered if to much buffer spaces is used. Signed-off-by: Neil Horman Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller commit 95628ee6e70ddbc9795456046eb63c99a5b05e2a Author: Jeff Garzik Date: Wed Aug 15 16:01:56 2007 -0700 [ETHTOOL]: Internal cleanup of ethtool_value-related handlers Several get/set functions can be handled by a passing the ethtool_op function pointer directly to a generic function. This permits deletion of a fair bit of redundant code. Signed-off-by: Jeff Garzik Signed-off-by: David S. Miller commit cdda49d2886d9167b6c75c30d1cd60edf592aab2 Author: Jeff Garzik Date: Wed Aug 15 16:01:32 2007 -0700 [ETHTOOL]: Introduce ->{get,set}_priv_flags, ETHTOOL_[GS]PFLAGS Signed-off-by: Jeff Garzik Signed-off-by: David S. Miller commit b6b3abe88e13ce11ac77976e229f10feed583ee6 Author: Jeff Garzik Date: Wed Aug 15 16:01:08 2007 -0700 [ETHTOOL]: Introduce get_sset_count. Obsolete get_stats_count, self_test_count Signed-off-by: Jeff Garzik Signed-off-by: David S. Miller commit ca54af327a8b8bf2510268f751cc176081f17e8f Author: Jeff Garzik Date: Wed Aug 15 16:00:51 2007 -0700 [ETHTOOL]: Add ETHTOOL_[GS]FLAGS sub-ioctls Signed-off-by: Jeff Garzik Signed-off-by: David S. Miller commit aef8d975ddf703f8065efc6f5aaf27368c17f1b6 Author: Satyam Sharma Date: Fri Aug 10 15:35:05 2007 -0700 [NET] netconsole: Support dynamic reconfiguration using configfs Based upon initial work by Keiichi Kii . This patch introduces support for dynamic reconfiguration (adding, removing and/or modifying parameters of netconsole targets at runtime) using a userspace interface exported via configfs. Documentation is also updated accordingly. Issues and brief design overview: (1) Kernel-initiated creation / destruction of kernel objects is not possible with configfs -- the lifetimes of the "config items" is managed exclusively from userspace. But netconsole must support boot/module params too, and these are parsed in kernel and hence netpolls must be setup from the kernel. Joel Becker suggested to separately manage the lifetimes of the two kinds of netconsole_target objects -- those created via configfs mkdir(2) from userspace and those specified from the boot/module option string. This adds complexity and some redundancy here and also means that boot/module param-created targets are not exposed through the configfs namespace (and hence cannot be updated / destroyed dynamically). However, this saves us from locking / refcounting complexities that would need to be introduced in configfs to support kernel-initiated item creation / destroy there. (2) In configfs, item creation takes place in the call chain of the mkdir(2) syscall in the driver subsystem. If we used an ioctl(2) to create / destroy objects from userspace, the special userspace program is able to fill out the structure to be passed into the ioctl and hence specify attributes such as local interface that are required at the time we set up the netpoll. For configfs, this information is not available at the time of mkdir(2). So, we keep all newly-created targets (via configfs) disabled by default. The user is expected to set various attributes appropriately (including the local network interface if required) and then write(2) "1" to the "enabled" attribute. Thus, netpoll_setup() is then called on the set parameters in the context of _this_ write(2) on the "enabled" attribute itself. This design enables the user to reconfigure existing netconsole targets at runtime to be attached to newly-come-up interfaces that may not have existed when netconsole was loaded or when the targets were actually created. All this effectively enables us to get rid of custom ioctls. (3) Ultra-paranoid configfs attribute show() and store() operations, with sanity and input range checking, using only safe string primitives, and compliant with the recommendations in Documentation/filesystems/sysfs.txt. (4) A new function netpoll_print_options() is created in the netpoll API, that just prints out the configured parameters for a netpoll structure. netpoll_parse_options() is modified to use that and it is also exported to be used from netconsole. Signed-off-by: Satyam Sharma Acked-by: Keiichi Kii Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 62fbf71122fb83e62ee6170432f5b7a4d708d199 Author: Satyam Sharma Date: Fri Aug 10 15:33:40 2007 -0700 [NET] netconsole: Support multiple logging targets Based upon initial work by Keiichi Kii . This patch introduces support for multiple targets, independent of CONFIG_NETCONSOLE_DYNAMIC -- this is useful even in the default case and (including the infrastructure introduced in previous patches) doesn't really add too many bytes to module text. All the complexity (and size) comes with the dynamic reconfigurability / userspace interface patch, and so it's plausible users may want to keep this enabled but that disabled (say to avoid a dependency on CONFIG_CONFIGFS_FS too). Also update documentation to mention the use of ";" separator to specify multiple logging targets in the boot/module option string. Brief overview: We maintain a target_list (and corresponding lock). Get rid of the static "default_target" and introduce allocation and release functions for our netconsole_target objects (but keeping sure to preserve previous behaviour such as default values). During init_netconsole(), ";" is used as the separator to identify multiple target specifications in the boot/module option string. The target specifications are parsed and netpolls setup. During exit, the target_list is torn down and all items released. Signed-off-by: Satyam Sharma Signed-off-by: Keiichi Kii Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 2e0e975f01d67281fc10f6f090f67b800e2b3270 Author: Satyam Sharma Date: Fri Aug 10 15:33:01 2007 -0700 [NET] netconsole: Introduce netconsole_netdev_notifier Based upon initial work by Keiichi Kii . To update fields of underlying netpoll structure at runtime on corresponding NETDEV_CHANGEADDR or NETDEV_CHANGENAME notifications. ioctl(SIOCSIFHWADDR or SIOCSIFNAME) could be used to change the hardware/MAC address or name of the local interface that our netpoll is attached to. Whenever this happens, netdev notifier chain is called out with the NETDEV_CHANGEADDR or NETDEV_CHANGENAME event message. We respond to that and update the local_mac or dev_name field of the struct netpoll. This makes sense anyway, but is especially required for dynamic netconsole because the netpoll structure's internal members become user visible files when either sysfs or configfs are used. So this helps us to keep up with the MAC address/name changes and keep values in struct netpoll uptodate. [ Note that ioctl(SIOCSIFADDR) to change IP address of interface at runtime is not handled (to update local_ip of netpoll) on purpose -- some setups may set the local_ip to a private address, not necessary the actual IP address of the sender host, as presently allowed. ] Signed-off-by: Satyam Sharma Signed-off-by: Keiichi Kii Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 9846d299b4aafebeda37435eb2ae7042d090d509 Author: Satyam Sharma Date: Fri Aug 10 15:32:14 2007 -0700 [NET] netconsole: Introduce netconsole_target Based upon initial work by Keiichi Kii . Introduce a wrapper structure over netpoll to represent logging targets configured in netconsole. This will get extended with other members in further patches. This is done independent of the (to-be-introduced) NETCONSOLE_DYNAMIC config option so that we're able to drastically cut down on the #ifdef complexity of final netconsole.c. Also, struct netconsole_target would be required for multiple targets support also, and not just dynamic reconfigurability. Signed-off-by: Satyam Sharma Signed-off-by: Keiichi Kii Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 3f4da0e41801cc9354ef9c8de6332e8bf8a43386 Author: Satyam Sharma Date: Fri Aug 10 15:31:19 2007 -0700 [NET] netconsole: Add some useful tips to documentation Based upon initial work by Keiichi Kii . Add some useful general-purpose tips. Also suggest solution for the frequent problem of console loglevel set too low numerically (i.e. for high priority messages only) on the sender. Signed-off-by: Satyam Sharma Acked-by: Keiichi Kii Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit ae1a3bf27122dedbd87c1baae7d6d7e920a3a46d Author: Satyam Sharma Date: Fri Aug 10 15:30:31 2007 -0700 [NET] netconsole: Use netif_running() in write_msg() Based upon initial work by Keiichi Kii . Avoid unnecessarily disabling interrupts and calling netpoll_send_udp() if the corresponding local interface is not up. Signed-off-by: Satyam Sharma Acked-by: Keiichi Kii Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 37aa447239b5ad7dbcfbb2595e684b2de812da2e Author: Satyam Sharma Date: Fri Aug 10 15:29:47 2007 -0700 [NET] netconsole: Simplify boot/module option setup logic Based upon initial work by Keiichi Kii . Presently, boot/module parameters are set up quite differently for the case of built-in netconsole (__setup() -> obsolete_checksetup() -> netpoll_parse_options() -> strlen(config) == 0 in init_netconsole()) vs modular netconsole (module_param_string() -> string copied to the config variable -> strlen(config) != 0 init_netconsole() -> netpoll_parse_options()). This patch makes both of them similar by doing exactly the equivalent of a module_param_string() in option_setup() also -- just copying the param string passed from the kernel command line into "config" variable. So, strlen(config) != 0 in both cases, and netpoll_parse_options() is always called from init_netconsole(), thus making the setup logic for both cases similar. Now, option_setup() is only ever called / used for the built-in case, so we put it inside a #ifndef MODULE, otherwise gcc will complain about option_setup() being "defined but not used". Also, the "configured" variable is redundant with this patch and hence removed. Signed-off-by: Satyam Sharma Signed-off-by: Keiichi Kii Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 80b8558811d453f6547980809c7857db1c1b2ccd Author: Satyam Sharma Date: Fri Aug 10 15:28:10 2007 -0700 [NET] netconsole: Remove bogus check Based upon initial work by Keiichi Kii . The (!np.dev) check in write_msg() is bogus (always false), because: np.dev is set by netpoll_setup(), which is called by init_netconsole() before register_console(), so write_msg() cannot be triggered unless netpoll_setup() successfully set np.dev. Also np.dev cannot go away from under us, because netpoll_setup() grabs us reference on it. So let's remove the bogus check. Signed-off-by: Satyam Sharma Acked-by: Keiichi Kii Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit cbf797329b709a02fff629c85c30dbc263c68f01 Author: Satyam Sharma Date: Fri Aug 10 15:27:24 2007 -0700 [NET] netconsole: Cleanups, codingstyle, prettyfication Based upon initial work by Keiichi Kii . (1) Remove unwanted headers. (2) Mark __init and __exit as appropriate. (3) Various trivial codingstyle and prettification stuff. Signed-off-by: Satyam Sharma Signed-off-by: Keiichi Kii Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 3520c136514861044cc8e115d133a262beab9899 Author: Stephen Hemminger Date: Fri Aug 10 15:22:58 2007 -0700 [IPV4] fib_trie: macro cleanup This patch converts the messy macro for MASK_PFX to inline function and expands TKEY_GET_MASK in the one place it is used. Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 80188531cc0bd7a994e50d6d4a34956777284338 Author: Stephen Hemminger Date: Fri Aug 10 15:22:13 2007 -0700 [IPV4] fib_trie: cleanup Try this out: * replace macro's with inlines * get rid of places doing multiple evaluations of NODE_PARENT [akpm@linux-foundation.org: rcu_dereference wants an lval] Signed-off-by: Andrew Morton Signed-off-by: David S. Miller commit 755b5bcaefd6f87050542c5255c156a0c5d79db0 Author: Ilpo Järvinen Date: Fri Aug 10 14:31:21 2007 -0700 [TCP]: Update comment about highest_sack validity This stale info came from the original idea, which proved to be unnecessarily complex, sacked_out > 0 is easy to do and that when it's going to be needed anyway (it _can_ be valid also when sacked_out == 0 but there's not going to be a guarantee about it for now). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit f58e01f353c48ca8bcbf051637a6892f69a8db5e Author: Ilpo Järvinen Date: Thu Aug 9 15:14:46 2007 +0300 [TCP]: Move sack_ok access to obviously named funcs & cleanup Previously code had IsReno/IsFack defined as macros that were local to tcp_input.c though sack_ok field has user elsewhere too for the same purpose. This changes them to static inlines as preferred according the current coding style and unifies the access to sack_ok across multiple files. Magic bitops of sack_ok for FACK and DSACK are also abstracted to functions with appropriate names. Note: - One sack_ok = 1 remains but that's self explanary, i.e., it enables sack - Couple of !IsReno cases are changed to tcp_is_sack - There were no users for IsDSack => I dropped it Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit ccdb37a4e6a439f1a5531d67af0b5df3043037ca Author: Ilpo Järvinen Date: Fri Jul 27 16:36:17 2007 +0300 [TCP]: Don't panic if S+L skb is detected BUG_ON is an overkill. In fact, I was mislead by BUG_TRAP severity (equals to WARN_ON) which is much lower than BUG_ON's (that panics). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 2ae3e691f332bd533d0d41ddd5f9a52be99ce332 Author: Ilpo Järvinen Date: Thu Aug 9 14:53:36 2007 +0300 [TCP]: Reduce sacked_out with reno when purging write_queue Previously TCP had a transitional state during which reno counted segments that are already below the current window into sacked_out, which is now prevented. In addition, re-try now the unconditional S+L skb catching. This approach conservatively calls just remove_sack and leaves reset_sack() calls alone. The best solution to the whole problem would be to first calculate the new sacked_out fully (this patch does not move reno_sack_reset calls from original sites and thus does not implement this). However, that would require very invasive change to fastretrans_alert (perhaps even slicing it to two halves). Alternatively, all callers of tcp_packets_in_flight (i.e., users that depend on sacked_out) should be postponed until the new sacked_out has been calculated but it isn't any simpler alternative. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit f580e0dbac486789fb54ea6a2900ae301184c9e2 Author: Ilpo Järvinen Date: Sat Jul 7 13:39:02 2007 +0300 [TCP]: Keep state in Disorder also if only lost_out > 0 This happens rather infrequently and is only possible during FRTO. We must not allow TCP to slip to Open state because tcp_fastretrans_alert might then not be called on it's time when FRTO has exited. This become a problem when left_out got removed and was replaced by just sacked_out. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit b91b36be57abd5769b7a8532e8d882a635cb8de8 Author: Ilpo Järvinen Date: Thu Aug 9 14:45:17 2007 +0300 [TCP]: Restore over-zealous tcp_sync_left_out-like removals tcp_verify_left_out is useful for verifying S+L condition, so add it back to couple of places in where the code was not calling to tcp_sync_left_out but used own ad-hoc solution (before the tcp_sync_left_out got removed). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 6f69b1f6fa3d7b58d82e7db482049f6e90060e84 Author: Ilpo Järvinen Date: Thu Aug 9 14:44:16 2007 +0300 [TCP]: Left out sync->verify (the new meaning of it) & definify Left_out was dropped a while ago, thus leaving verifying consistency of the "left out" as only task for the function in question. Thus make it's name more appropriate. In addition, it is intentionally converted to #define instead of static inline because the location of the invariant failure is the most important thing to have if this ever triggers. I think it would have been helpful e.g. in this case where the location of the failure point had to be based on some quesswork: http://lkml.org/lkml/2007/5/2/464 ...Luckily the guesswork seems to have proved to be correct. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 8899592e552aaea1c50eed7b96bd3920a84f6e1b Author: Ilpo Järvinen Date: Thu Aug 9 14:37:30 2007 +0300 [TCP]: Add tcp_left_out(tp) "back" to get cleaner looking lines tp->left_out got removed but nothing came to replace it back then (users just did addition by themselves), so add function for users now. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 2a44368e150839a7097493a1a001ca21f7637ebe Author: Ilpo Järvinen Date: Thu Aug 9 14:33:18 2007 +0300 [TCP]: Tighten tcp_sock's belt, drop left_out It is easily calculable when needed and user are not that many after all. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit d17b4f46dc13674542605a24df171bea5de7ee7a Author: Ilpo Järvinen Date: Thu May 31 10:16:47 2007 +0300 [TCP]: Remove num_acked>0 checks from cong.ctrl mods pkts_acked There is no need for such check in pkts_acked because the callback is not invoked unless at least one segment got fully ACKed (i.e., the snd_una moved past skb's end_seq) by the cumulative ACK's snd_una advancement. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 7e08a2b3b07fc8f83f38ef33f2a2620ead47c34d Author: Ilpo Järvinen Date: Fri Jun 15 12:58:38 2007 +0300 [TCP]: Add tcp_dec_pcount_approx int variant Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 072b119091906bbdd79820bfe8cf450d268f64f1 Author: Ilpo Järvinen Date: Sun May 27 02:04:16 2007 -0700 [TCP]: Move code from tcp_ecn.h to tcp*.c and tcp.h & remove it No other users exist for tcp_ecn.h. Very few things remain in tcp.h, for most TCP ECN functions callers reside within a single .c file and can be placed there. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 4fb92de7f2d846ff70cdd4031aab9a9061d85c17 Author: Ilpo Järvinen Date: Sun May 27 02:03:20 2007 -0700 [TCP]: Access to highest_sack obsoletes forward_cnt_hint In addition, added a reference about the purpose of the loop. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 9da726a8bad301cf94bab83afc31583288f453ce Author: Ilpo Järvinen Date: Sun May 27 01:53:49 2007 -0700 [TCP] FRTO: remove unnecessary fackets/sacked_out recounting F-RTO does not touch SACKED_ACKED bits at all, so there is no need to recount them in tcp_enter_frto_loss. After removal of the else branch, nested ifs can be combined. This must also reset sacked_out when SACK is not in use as TCP could have received some duplicate ACKs prior RTO. To achieve that in a sane manner, tcp_reset_reno_sack was re-placed by the previous patch. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 4c94db6ed111c025f1df9509c6cb98b36d3b9bbe Author: Ilpo Järvinen Date: Sun May 27 01:52:00 2007 -0700 [TCP]: Move Reno SACKed_out counter functions earlier Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 423bf503815f2e78b910b6c43f98eb59608d63f6 Author: David S. Miller Date: Mon Jun 18 22:43:06 2007 -0700 [TCP]: Extract DSACK detection code from tcp_sacktag_write_queue(). Signed-off-by: David S. Miller commit 53ccd3d15405260e34d8818afccfc0694632cc41 Author: Ilpo Järvinen Date: Wed Mar 28 12:06:37 2007 -0700 [TCP]: Rexmit hint must be cleared instead of setting it Stupid error from my side. Even though now that I noticed this, I hoped it would have been an optimization but no, the counter hint is then incorrect. Thus clearing is necessary for now (I still suspect though that this path is never executed). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 975318b00e3826210e6020ecc3e826b064b48e75 Author: Ilpo Järvinen Date: Fri Apr 20 22:56:38 2007 -0700 [TCP]: Extracted rexmit hint clearing from the LOST marking code Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 80969ad493897dd938e57adeedc3edf4741d9fc4 Author: Ilpo Järvinen Date: Sat Mar 24 21:03:23 2007 -0700 [TCP]: Add highest_sack seqno, points to globally highest SACK It is guaranteed to be valid only when !tp->sacked_out. In most cases this seqno is available in the last ACK but there is no guarantee for that. The new fast recovery loss marking algorithm needs this as entry point. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller commit 2dd383332ca2c7af28b2fb9afebbc1ce74b1e172 Author: Thomas Graf Date: Wed Aug 8 23:12:56 2007 -0700 [NEIGH]: Netlink notifications Currently neighbour event notifications are limited to update notifications and only sent if the ARP daemon is enabled. This patch extends the existing notification code by also reporting neighbours being removed due to gc or administratively and removes the dependency on the ARP daemon. This allows to keep track of neighbour states without periodically fetching the complete neighbour table. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit 0c4b95716486ca7fc0e56dcd5dc519cff16f9b04 Author: Thomas Graf Date: Wed Aug 8 23:12:36 2007 -0700 [NEIGH]: Combine neighbour cleanup and release Introduces neigh_cleanup_and_release() to be used after a neighbour has been removed from its neighbour table. Serves as preparation to add event notifications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller commit ffddae271fa326e113100a186425e572d7366466 Author: Andrew Gallatin Date: Wed Aug 22 13:45:46 2007 -0700 [MYRI10GE]: Use LRO. Singed off by: Andrew Gallatin Signed-off-by: David S. Miller commit 89182775c2cbb7db5c0325692a3e6e4df46b405f Author: Jan-Bernd Themann Date: Tue Aug 28 15:12:16 2007 -0700 [EHEA]: Use LRO. Signed-off-by: Jan-Bernd Themann Signed-off-by: David S. Miller commit 1bb11db5ef5930855745c995ab70d765ac4a20f6 Author: Jan-Bernd Themann Date: Wed Aug 8 22:38:05 2007 -0700 [NET]: Generic Large Receive Offload for TCP traffic This patch provides generic Large Receive Offload (LRO) functionality for IPv4/TCP traffic. LRO combines received tcp packets to a single larger tcp packet and passes them then to the network stack in order to increase performance (throughput). The interface supports two modes: Drivers can either pass SKBs or fragment lists to the LRO engine. Signed-off-by: Jan-Bernd Themann Signed-off-by: David S. Miller commit 4475388b3ddf6691bacd1df8586fd59ce990fd62 Author: Pavel Emelyanov Date: Wed Aug 8 22:23:11 2007 -0700 [NET]: Virtual ethernet device driver. Veth stands for Virtual ETHernet. It is a simple tunnel driver that works at the link layer and looks like a pair of ethernet devices interconnected with each other. Mainly it allows to communicate between network namespaces but it can be used as is as well. The newlink callback is organized that way to make it easy to create the peer device in the separate namespace when we have them in kernel. This implementation uses another interface - the RTM_NRELINK message introduced by Patric. Signed-off-by: Pavel Emelyanov Acked-by: Patrick McHardy Signed-off-by: David S. Miller commit 27559b6cc843870b056d1394505b9c39c919d771 Author: Pavel Emelianov Date: Wed Aug 8 22:16:38 2007 -0700 [RTNETLINK]: Introduce generic rtnl_create_link(). This routine gets the parsed rtnl attributes and creates a new link with generic info (IFLA_LINKINFO policy). Its intention is to help the drivers, that need to create several links at once (like VETH). This is nothing but a copy-paste-ed part of rtnl_newlink() function that is responsible for creation of new device. Signed-off-by: Pavel Emelianov Acked-by: Patrick McHardy Signed-off-by: David S. Miller commit 3b3765d404fae8fc5c3c550884bc44aa3e3905e5 Author: Stephen Hemminger Date: Tue Aug 28 14:16:36 2007 -0700 [NET]: Make NAPI polling independent of struct net_device objects. Several devices have multiple independant RX queues per net device, and some have a single interrupt doorbell for several queues. In either case, it's easier to support layouts like that if the structure representing the poll is independant from the net device itself. The signature of the ->poll() call back goes from: int foo_poll(struct net_device *dev, int *budget) to int foo_poll(struct napi_struct *napi, int budget) The caller is returned the number of RX packets processed (or the number of "NAPI credits" consumed if you want to get abstract). The callee no longer messes around bumping dev->quota, *budget, etc. because that is all handled in the caller upon return. The napi_struct is to be embedded in the device driver private data structures. Furthermore, it is the driver's responsibility to disable all NAPI instances in it's ->stop() device close handler. Since the napi_struct is privatized into the driver's private data structures, only the driver knows how to get at all of the napi_struct instances it may have per-device. With lots of help and suggestions from Rusty Russell, Roland Dreier, Michael Chan, Jeff Garzik, and Jamal Hadi Salim. Bug fixes from Thomas Graf. [ Ported to current tree and all drivers converted. -DaveM ] Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller commit ce381671f8d64551b3ad55295a826e0981b853bf Author: Johannes Berg Date: Mon Aug 13 14:04:30 2007 +0200 [WIRELESS] radiotap parser: accept all other fields This makes the radiotap parser accept all other fields that are currently defined. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville commit 0e96ed04a987d509fdf67bb4a526f679b05b35d2 Author: Larry Finger Date: Fri Aug 10 11:23:20 2007 -0500 [MAC80211]: Add SIOCGIWTXPOWER routine The wireless extensions ioctl's implemented in mac80211 do not include SIOCGIWTXPOWER. This patch adds the necessary code. Acked-by: Michael Buesch Signed-off-by: Larry Finger Signed-off-by: John W. Linville commit f8c085ec1e498240e7a3b2a90fafb47ba426490f Author: Thomas Graf Date: Sun Jul 22 12:43:32 2007 +0200 [WIRELESS]: Use type safe netlink interface Makes use of the type safe netlink interface and adds a warning if the message is too big for NLMSG_DEFAULT_SIZE to help debug. Signed-off-by: Thomas Graf Signed-off-by: John W. Linville commit 3248603048d689bb17da4ae104fe8f72a8948236 Author: Johannes Berg Date: Fri Jul 27 15:43:24 2007 +0200 [MAC80211]: fix add_interface monitor mode behaviour This makes it behave the same whether we have monitor during operation or not. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 5c3b6ffe439dcfc0ad0ea37f2d46d0f67bd28416 Author: Andy Green Date: Fri Jul 27 15:43:24 2007 +0200 [MAC80211]: Improve sanity checks on injected packets Michael Wu noticed that the skb length checking is not taken care of enough when a packet is presented on the Monitor interface for injection. This patch improves the sanity checking and removes fake offsets placed into the skb network and transport header. Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 1f4cda711ccc12b2353dbc4231cc612653cf9f18 Author: Andy Green Date: Fri Jul 27 15:43:24 2007 +0200 [MAC80211]: Add get_unaligned to ieee80211_get_radiotap_len ieee80211_get_radiotap_len() tries to dereference radiotap length without taking care that it is completely unaligned and get_unaligned() is required. Signed-off-by: Andy Green Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 32ffc46f6ec38187c03243f37b6f5a48919f6d1c Author: Michael Wu Date: Fri Jul 27 15:43:24 2007 +0200 [MAC80211]: use more GFP_KERNEL instead of GFP_ATOMIC This patch replaces atomic allocations with regular ones where possible. Merged with "revert some GFP_ATOMIC -> GFP_KERNEL changes" from Michael Wu: > Some of the allocations made with GFP_ATOMIC really were necessary. Signed-off-by: Michael Wu Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 6851b21668f772c6781231742f3d4002b178a20d Author: Daniel Drake Date: Fri Jul 27 15:43:24 2007 +0200 [MAC80211]: implement ERP info change notifications zd1211rw and bcm43xx are interested in being notified when ERP IE conditions change, so that they can reprogram a register which affects how control frames are transmitted. This patch adds an interface similar to the one that can be found in softmac. Signed-off-by: Daniel Drake Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 59a80437c116845f838836947ec54312cf121d40 Author: Daniel Drake Date: Fri Jul 27 15:43:24 2007 +0200 [MAC80211]: improved short preamble handling Similarly to CTS protection, whether short preambles are used for 802.11b transmissions should be a per-subif setting, not device global. For STAs, this patch makes short preamble handling automatic based on the ERP IE. For APs, hostapd still uses the prism ioctls, but the write ioctl has been restricted to AP-only subifs. ieee80211_txrx_data.short_preamble (an unused field) was removed. Unfortunately, some API changes were required for the following functions: - ieee80211_generic_frame_duration - ieee80211_rts_duration - ieee80211_ctstoself_duration - ieee80211_rts_get - ieee80211_ctstoself_get Affected drivers were updated accordingly. Signed-off-by: Daniel Drake Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 534b90e7962ef0fc58e459e8f653697b7bbc8e3a Author: Ivo van Doorn Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: Add LONG_RETRY flag to ieee80211_tx_control mac80211 informs the driver what the short and long retry values are through set_retry_limit(), but when packets are being transmitted it did not inform the driver which of the 2 retry limits should actually be used. Instead it sends the actual value, but for drivers that can only set the retry limit and the register and in the descriptor need to indicate which of the limits should be used this is not really useful. This patch will add a IEEE80211_TXCTL_LONG_RETRY_LIMIT flag to the ieee80211_tx_control structure. By default the short retry limit should be used but if the flag is set the long retry should be used. This does not prevent the driver to ignore the request for "no retry" packets, but at least those will be send out with the short retry limit. But there is no perfect cure for this problem.. :( Signed-off-by: Ivo van Doorn Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit aaeae9461e6bb6e9b3728973fe62ee9850ad2895 Author: Daniel Drake Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: STA reassociation improvements My cheapy D-Link AP behaves strangely w.r.t reassociations. The following sequence of commands causes me to lose association and to be unable to regain it: ifconfig eth8 down ifconfig eth8 up iwconfig eth8 essid This is because mac80211 tries to reassociate, rather than just associate. My AP replies with an association response (not a reassociation response...) denying the association with code 12: "Association denied due to reason outside the scope of this standard" mac80211 tries this reassociation another 4 times or so before finally giving up. I see 2 problems here: 1. bringing the interface down and up again should be resetting interface state i.e. after the interface is brought down, it should have no memory of if or where it was previously associated 2. after the first reassociation fails, mac80211 should fall back to standard association for the next attempt Signed-off-by: Daniel Drake Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 15dca83fd6ca5bc6466e84b3786dfa925cbe2ad3 Author: Michael Wu Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: improve locking of sta_info related structures The sta_info code has some awkward locking which prevents some driver callbacks from being allowed to sleep. This patch makes the locking more focused so code that calls driver callbacks are allowed to sleep. It also converts sta_lock to a rwlock. Signed-off-by: Michael Wu Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit a352803089f8fd43bf7512739c008850025a67ab Author: Johannes Berg Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: introduce util.c Introduce a new file util.c and move a whole bunch of functions into it. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 47c813a1443a17ae5d303537196cde2cdebfae61 Author: Johannes Berg Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: reorder interface related functions This patch groups a whole bunch of functions together to make ieee80211.c more maintainable. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit e4dc210beecfaf587c6766c4810af4078b7a3971 Author: Johannes Berg Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: move some rate control functions out of ieee80211.c I think these can go with rate control just as well and it makes ieee80211.c more readable. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit f5e8dac1dcfb549431958cadec3a8061034842b3 Author: Johannes Berg Date: Fri Jul 27 15:43:23 2007 +0200 [MAC80211]: split out some key functions from ieee80211.c into a new file key.c which doesn't have much code right now but it makes ieee80211.c easier to read. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 1d5ff81a9c0580770c1606959b57003eff439f95 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: remove some unnecessary includes Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 00f71aee782713a42369e3404408ceadc5d0bacf Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: split TX path into own file Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit d8ee25440ca7e326e1d98c27ac51baa147520bd9 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: rx cleanups (2) Some more outdenting to make the code more readable. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 4c5c9da9276b41c11a59b70d08fa4826c3862ed3 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: fix bug for per-sta stats pre_rx handlers can't really touch sta since for IBSS it might not be assigned yet, it can create sta info structs on-the-fly. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit dad0ee939aa0cc3c750cfde8e95cca3932154955 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: split up __ieee80211_rx The really indented part that does the huge switch on the interface type is a nuisance. Put it into an own function 'prepare_for_handlers'. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 8982787e9eed8118685364714ce4b886ce70e3a5 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: split ieee80211_rx_h_check handler The ieee80211_rx_h_check handler really does two things, it's a lot easier to understand if it's split into ieee80211_rx_h_check and ieee80211_rx_h_load_key, and it may be possible in the future to optimise the key loading to not do it for each interface. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 78f53226f82d18eaa4cc8f858196701933659c79 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: rx cleanups (1) Make some really indented code more readable by outdenting. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 219d8bc32dfd53556e706a27faaf0f95d4045f80 Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: move QoS rx handlers into rx.c This patch moves the QoS handlers into rx.c making it possible to compile wme.c only when NET_SCHED is defined. Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville commit 4d7011e9858253bfac0857181f24f1dd8d9557ce Author: Johannes Berg Date: Fri Jul 27 15:43:22 2007 +0200 [MAC80211]: split RX handlers into own file Signed-off-by: Johannes Berg Signed-off-by: Jiri Benc Signed-off-by: John W. Linville Signed-off-by: Andrew Morton --- Documentation/DocBook/kernel-api.tmpl | 8 Documentation/feature-removal-schedule.txt | 9 Documentation/networking/NAPI_HOWTO.txt | 766 -- Documentation/networking/netconsole.txt | 99 Documentation/networking/netdevices.txt | 12 drivers/infiniband/ulp/ipoib/ipoib.h | 4 drivers/infiniband/ulp/ipoib/ipoib_ib.c | 43 drivers/infiniband/ulp/ipoib/ipoib_main.c | 11 drivers/net/8139cp.c | 41 drivers/net/8139too.c | 48 drivers/net/Kconfig | 17 drivers/net/Makefile | 1 drivers/net/amd8111e.c | 30 drivers/net/amd8111e.h | 2 drivers/net/arm/ep93xx_eth.c | 72 drivers/net/b44.c | 48 drivers/net/b44.h | 2 drivers/net/bnx2.c | 45 drivers/net/bnx2.h | 2 drivers/net/cassini.c | 40 drivers/net/cassini.h | 2 drivers/net/chelsio/common.h | 1 drivers/net/chelsio/cxgb2.c | 9 drivers/net/chelsio/sge.c | 29 drivers/net/chelsio/sge.h | 2 drivers/net/cxgb3/adapter.h | 28 drivers/net/cxgb3/cxgb3_main.c | 194 drivers/net/cxgb3/sge.c | 162 drivers/net/e100.c | 37 drivers/net/e1000/e1000.h | 1 drivers/net/e1000/e1000_main.c | 45 drivers/net/ehea/ehea.h | 11 drivers/net/ehea/ehea_ethtool.c | 15 drivers/net/ehea/ehea_main.c | 190 drivers/net/epic100.c | 36 drivers/net/fec_8xx/fec_8xx.h | 2 drivers/net/fec_8xx/fec_main.c | 59 drivers/net/forcedeth.c | 69 drivers/net/fs_enet/fs_enet-main.c | 55 drivers/net/fs_enet/fs_enet.h | 1 drivers/net/gianfar.c | 47 drivers/net/gianfar.h | 3 drivers/net/ibmveth.c | 123 drivers/net/ibmveth.h | 1 drivers/net/irda/Kconfig | 26 drivers/net/irda/Makefile | 2 drivers/net/irda/ks959-sir.c | 939 +++ drivers/net/irda/ksdazzle-sir.c | 823 +++ drivers/net/ixgb/ixgb.h | 1 drivers/net/ixgb/ixgb_main.c | 29 drivers/net/ixp2000/ixpdev.c | 39 drivers/net/ixp2000/ixpdev.h | 2 drivers/net/macb.c | 38 drivers/net/macb.h | 1 drivers/net/mv643xx_eth.c | 48 drivers/net/mv643xx_eth.h | 2 drivers/net/myri10ge/myri10ge.c | 149 drivers/net/natsemi.c | 42 drivers/net/netconsole.c | 777 ++- drivers/net/netxen/netxen_nic.h | 1 drivers/net/netxen/netxen_nic_main.c | 39 drivers/net/pasemi_mac.c | 36 drivers/net/pasemi_mac.h | 1 drivers/net/pcnet32.c | 82 drivers/net/ps3_gelic_net.c | 46 drivers/net/ps3_gelic_net.h | 1 drivers/net/qla3xxx.c | 29 drivers/net/qla3xxx.h | 2 drivers/net/r8169.c | 58 drivers/net/s2io.c | 35 drivers/net/s2io.h | 3 drivers/net/sb1250-mac.c | 47 drivers/net/sis190.c | 19 drivers/net/skge.c | 44 drivers/net/skge.h | 1 drivers/net/sky2.c | 89 drivers/net/sky2.h | 1 drivers/net/spider_net.c | 58 drivers/net/spider_net.h | 2 drivers/net/starfire.c | 51 drivers/net/sungem.c | 52 drivers/net/sungem.h | 1 drivers/net/tc35815.c | 49 drivers/net/tg3.c | 61 drivers/net/tg3.h | 1 drivers/net/tsi108_eth.c | 40 drivers/net/tulip/interrupt.c | 54 drivers/net/tulip/tulip.h | 3 drivers/net/tulip/tulip_core.c | 11 drivers/net/typhoon.c | 47 drivers/net/ucc_geth.c | 64 drivers/net/ucc_geth.h | 1 drivers/net/veth.c | 474 + drivers/net/via-rhine.c | 42 drivers/net/wireless/rtl8187.h | 1 drivers/net/wireless/rtl8187_dev.c | 7 drivers/net/xen-netfront.c | 33 include/linux/dccp.h | 4 include/linux/ethtool.h | 36 include/linux/inet_lro.h | 177 include/linux/ktime.h | 21 include/linux/netdevice.h | 361 + include/linux/netpoll.h | 56 include/linux/snmp.h | 3 include/linux/tcp.h | 5 include/net/ieee80211_radiotap.h | 10 include/net/ip6_fib.h | 2 include/net/ipv6.h | 6 include/net/mac80211.h | 213 include/net/rtnetlink.h | 4 include/net/sctp/sctp.h | 5 include/net/sock.h | 3 include/net/tcp.h | 80 include/net/tcp_ecn.h | 130 include/net/veth.h | 12 kernel/hrtimer.c | 24 kernel/softirq.c | 2 net/atm/br2684.c | 28 net/atm/clip.c | 62 net/atm/common.c | 23 net/atm/lec.c | 104 net/atm/pppoatm.c | 23 net/atm/raw.c | 12 net/atm/signaling.c | 25 net/atm/svc.c | 25 net/bridge/br_stp_bpdu.c | 2 net/core/dev.c | 178 net/core/ethtool.c | 339 - net/core/neighbour.c | 60 net/core/net-sysfs.c | 15 net/core/netpoll.c | 87 net/core/pktgen.c | 94 net/core/rtnetlink.c | 85 net/dccp/ackvec.c | 16 net/dccp/ackvec.h | 4 net/dccp/ccids/ccid3.c | 71 net/dccp/ccids/ccid3.h | 7 net/dccp/ccids/lib/loss_interval.c | 11 net/dccp/ccids/lib/loss_interval.h | 4 net/dccp/ccids/lib/packet_history.h | 9 net/dccp/dccp.h | 46 net/dccp/input.c | 23 net/dccp/minisocks.c | 1 net/dccp/options.c | 35 net/dccp/proto.c | 1 net/ethernet/eth.c | 16 net/ieee80211/ieee80211_crypt_ccmp.c | 5 net/ipv4/Kconfig | 8 net/ipv4/Makefile | 1 net/ipv4/fib_trie.c | 86 net/ipv4/inet_diag.c | 5 net/ipv4/inet_lro.c | 600 ++ net/ipv4/inet_timewait_sock.c | 4 net/ipv4/ip_forward.c | 2 net/ipv4/proc.c | 3 net/ipv4/tcp.c | 5 net/ipv4/tcp_bic.c | 2 net/ipv4/tcp_cubic.c | 2 net/ipv4/tcp_input.c | 473 + net/ipv4/tcp_minisocks.c | 9 net/ipv4/tcp_output.c | 125 net/ipv4/tcp_timer.c | 2 net/ipv4/udp.c | 82 net/ipv4/udp_impl.h | 2 net/ipv4/udplite.c | 3 net/ipv6/inet6_connection_sock.c | 34 net/ipv6/ip6_output.c | 4 net/ipv6/ipv6_sockglue.c | 2 net/ipv6/tcp_ipv6.c | 4 net/ipv6/udp.c | 2 net/irda/af_irda.c | 21 net/key/af_key.c | 17 net/mac80211/Makefile | 7 net/mac80211/aes_ccm.c | 5 net/mac80211/debugfs.c | 9 net/mac80211/debugfs_key.c | 65 net/mac80211/debugfs_netdev.c | 14 net/mac80211/debugfs_sta.c | 1 net/mac80211/event.c | 42 net/mac80211/hostapd_ioctl.h | 38 net/mac80211/ieee80211.c | 4867 ++----------------- net/mac80211/ieee80211_common.h | 4 net/mac80211/ieee80211_i.h | 158 net/mac80211/ieee80211_iface.c | 35 net/mac80211/ieee80211_ioctl.c | 481 - net/mac80211/ieee80211_key.h | 49 net/mac80211/ieee80211_rate.c | 42 net/mac80211/ieee80211_rate.h | 6 net/mac80211/ieee80211_sta.c | 186 net/mac80211/key.c | 275 + net/mac80211/rc80211_simple.c | 8 net/mac80211/rx.c | 1489 +++++ net/mac80211/sta_info.c | 205 net/mac80211/sta_info.h | 24 net/mac80211/tkip.c | 18 net/mac80211/tx.c | 1896 +++++++ net/mac80211/util.c | 488 + net/mac80211/wep.c | 18 net/mac80211/wme.c | 76 net/mac80211/wme.h | 9 net/mac80211/wpa.c | 107 net/netlink/af_netlink.c | 2 net/sched/sch_generic.c | 12 net/sctp/endpointola.c | 1 net/sctp/ipv6.c | 4 net/sctp/protocol.c | 32 net/sctp/sm_statefuns.c | 74 net/sctp/socket.c | 69 net/sctp/sysctl.c | 33 net/sctp/ulpevent.c | 18 net/sctp/ulpqueue.c | 1 net/sunrpc/svcsock.c | 3 net/wireless/radiotap.c | 6 net/wireless/wext.c | 30 net/xfrm/xfrm_policy.c | 2 net/xfrm/xfrm_user.c | 1041 +--- 216 files changed, 13342 insertions(+), 9597 deletions(-) diff -puN Documentation/DocBook/kernel-api.tmpl~git-net Documentation/DocBook/kernel-api.tmpl --- a/Documentation/DocBook/kernel-api.tmpl~git-net +++ a/Documentation/DocBook/kernel-api.tmpl @@ -240,17 +240,23 @@ X!Ilib/string.c Driver Support !Enet/core/dev.c !Enet/ethernet/eth.c +!Enet/sched/sch_generic.c !Iinclude/linux/etherdevice.h +!Iinclude/linux/netdevice.h + + PHY Support !Edrivers/net/phy/phy.c !Idrivers/net/phy/phy.c !Edrivers/net/phy/phy_device.c !Idrivers/net/phy/phy_device.c !Edrivers/net/phy/mdio_bus.c !Idrivers/net/phy/mdio_bus.c + +--> Synchronous PPP !Edrivers/net/wan/syncppp.c diff -puN Documentation/feature-removal-schedule.txt~git-net Documentation/feature-removal-schedule.txt --- a/Documentation/feature-removal-schedule.txt~git-net +++ a/Documentation/feature-removal-schedule.txt @@ -298,3 +298,12 @@ Why: All mthca hardware also supports MS Who: Roland Dreier --------------------------- + +What: shaper network driver +When: January 2008 +Files: drivers/net/shaper.c, include/linux/if_shaper.h +Why: This driver has been marked obsolete for many years. + It was only designed to work on lower speed links and has design + flaws that lead to machine crashes. The qdisc infrastructure in + 2.4 or later kernels, provides richer features and is more robust. +Who: Stephen Hemminger diff -puN Documentation/networking/NAPI_HOWTO.txt~git-net /dev/null --- a/Documentation/networking/NAPI_HOWTO.txt +++ /dev/null @@ -1,766 +0,0 @@ -HISTORY: -February 16/2002 -- revision 0.2.1: -COR typo corrected -February 10/2002 -- revision 0.2: -some spell checking ;-> -January 12/2002 -- revision 0.1 -This is still work in progress so may change. -To keep up to date please watch this space. - -Introduction to NAPI -==================== - -NAPI is a proven (www.cyberus.ca/~hadi/usenix-paper.tgz) technique -to improve network performance on Linux. For more details please -read that paper. -NAPI provides a "inherent mitigation" which is bound by system capacity -as can be seen from the following data collected by Robert on Gigabit -ethernet (e1000): - - Psize Ipps Tput Rxint Txint Done Ndone - --------------------------------------------------------------- - 60 890000 409362 17 27622 7 6823 - 128 758150 464364 21 9301 10 7738 - 256 445632 774646 42 15507 21 12906 - 512 232666 994445 241292 19147 241192 1062 - 1024 119061 1000003 872519 19258 872511 0 - 1440 85193 1000003 946576 19505 946569 0 - - -Legend: -"Ipps" stands for input packets per second. -"Tput" == packets out of total 1M that made it out. -"txint" == transmit completion interrupts seen -"Done" == The number of times that the poll() managed to pull all -packets out of the rx ring. Note from this that the lower the -load the more we could clean up the rxring -"Ndone" == is the converse of "Done". Note again, that the higher -the load the more times we couldn't clean up the rxring. - -Observe that: -when the NIC receives 890Kpackets/sec only 17 rx interrupts are generated. -The system cant handle the processing at 1 interrupt/packet at that load level. -At lower rates on the other hand, rx interrupts go up and therefore the -interrupt/packet ratio goes up (as observable from that table). So there is -possibility that under low enough input, you get one poll call for each -input packet caused by a single interrupt each time. And if the system -cant handle interrupt per packet ratio of 1, then it will just have to -chug along .... - - -0) Prerequisites: -================== -A driver MAY continue using the old 2.4 technique for interfacing -to the network stack and not benefit from the NAPI changes. -NAPI additions to the kernel do not break backward compatibility. -NAPI, however, requires the following features to be available: - -A) DMA ring or enough RAM to store packets in software devices. - -B) Ability to turn off interrupts or maybe events that send packets up -the stack. - -NAPI processes packet events in what is known as dev->poll() method. -Typically, only packet receive events are processed in dev->poll(). -The rest of the events MAY be processed by the regular interrupt handler -to reduce processing latency (justified also because there are not that -many of them). -Note, however, NAPI does not enforce that dev->poll() only processes -receive events. -Tests with the tulip driver indicated slightly increased latency if -all of the interrupt handler is moved to dev->poll(). Also MII handling -gets a little trickier. -The example used in this document is to move the receive processing only -to dev->poll(); this is shown with the patch for the tulip driver. -For an example of code that moves all the interrupt driver to -dev->poll() look at the ported e1000 code. - -There are caveats that might force you to go with moving everything to -dev->poll(). Different NICs work differently depending on their status/event -acknowledgement setup. -There are two types of event register ACK mechanisms. - I) what is known as Clear-on-read (COR). - when you read the status/event register, it clears everything! - The natsemi and sunbmac NICs are known to do this. - In this case your only choice is to move all to dev->poll() - - II) Clear-on-write (COW) - i) you clear the status by writing a 1 in the bit-location you want. - These are the majority of the NICs and work the best with NAPI. - Put only receive events in dev->poll(); leave the rest in - the old interrupt handler. - ii) whatever you write in the status register clears every thing ;-> - Cant seem to find any supported by Linux which do this. If - someone knows such a chip email us please. - Move all to dev->poll() - -C) Ability to detect new work correctly. -NAPI works by shutting down event interrupts when there's work and -turning them on when there's none. -New packets might show up in the small window while interrupts were being -re-enabled (refer to appendix 2). A packet might sneak in during the period -we are enabling interrupts. We only get to know about such a packet when the -next new packet arrives and generates an interrupt. -Essentially, there is a small window of opportunity for a race condition -which for clarity we'll refer to as the "rotting packet". - -This is a very important topic and appendix 2 is dedicated for more -discussion. - -Locking rules and environmental guarantees -========================================== - --Guarantee: Only one CPU at any time can call dev->poll(); this is because -only one CPU can pick the initial interrupt and hence the initial -netif_rx_schedule(dev); -- The core layer invokes devices to send packets in a round robin format. -This implies receive is totally lockless because of the guarantee that only -one CPU is executing it. -- contention can only be the result of some other CPU accessing the rx -ring. This happens only in close() and suspend() (when these methods -try to clean the rx ring); -****guarantee: driver authors need not worry about this; synchronization -is taken care for them by the top net layer. --local interrupts are enabled (if you dont move all to dev->poll()). For -example link/MII and txcomplete continue functioning just same old way. -This improves the latency of processing these events. It is also assumed that -the receive interrupt is the largest cause of noise. Note this might not -always be true. -[according to Manfred Spraul, the winbond insists on sending one -txmitcomplete interrupt for each packet (although this can be mitigated)]. -For these broken drivers, move all to dev->poll(). - -For the rest of this text, we'll assume that dev->poll() only -processes receive events. - -new methods introduce by NAPI -============================= - -a) netif_rx_schedule(dev) -Called by an IRQ handler to schedule a poll for device - -b) netif_rx_schedule_prep(dev) -puts the device in a state which allows for it to be added to the -CPU polling list if it is up and running. You can look at this as -the first half of netif_rx_schedule(dev) above; the second half -being c) below. - -c) __netif_rx_schedule(dev) -Add device to the poll list for this CPU; assuming that _prep above -has already been called and returned 1. - -d) netif_rx_reschedule(dev, undo) -Called to reschedule polling for device specifically for some -deficient hardware. Read Appendix 2 for more details. - -e) netif_rx_complete(dev) - -Remove interface from the CPU poll list: it must be in the poll list -on current cpu. This primitive is called by dev->poll(), when -it completes its work. The device cannot be out of poll list at this -call, if it is then clearly it is a BUG(). You'll know ;-> - -All of the above methods are used below, so keep reading for clarity. - -Device driver changes to be made when porting NAPI -================================================== - -Below we describe what kind of changes are required for NAPI to work. - -1) introduction of dev->poll() method -===================================== - -This is the method that is invoked by the network core when it requests -for new packets from the driver. A driver is allowed to send upto -dev->quota packets by the current CPU before yielding to the network -subsystem (so other devices can also get opportunity to send to the stack). - -dev->poll() prototype looks as follows: -int my_poll(struct net_device *dev, int *budget) - -budget is the remaining number of packets the network subsystem on the -current CPU can send up the stack before yielding to other system tasks. -*Each driver is responsible for decrementing budget by the total number of -packets sent. - Total number of packets cannot exceed dev->quota. - -dev->poll() method is invoked by the top layer, the driver just sends if it -can to the stack the packet quantity requested. - -more on dev->poll() below after the interrupt changes are explained. - -2) registering dev->poll() method -=================================== - -dev->poll should be set in the dev->probe() method. -e.g: -dev->open = my_open; -. -. -/* two new additions */ -/* first register my poll method */ -dev->poll = my_poll; -/* next register my weight/quanta; can be overridden in /proc */ -dev->weight = 16; -. -. -dev->stop = my_close; - - - -3) scheduling dev->poll() -============================= -This involves modifying the interrupt handler and the code -path which takes the packet off the NIC and sends them to the -stack. - -it's important at this point to introduce the classical D Becker -interrupt processor: - ------------------- -static irqreturn_t -netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - - struct net_device *dev = (struct net_device *)dev_instance; - struct my_private *tp = (struct my_private *)dev->priv; - - int work_count = my_work_count; - status = read_interrupt_status_reg(); - if (status == 0) - return IRQ_NONE; /* Shared IRQ: not us */ - if (status == 0xffff) - return IRQ_HANDLED; /* Hot unplug */ - if (status & error) - do_some_error_handling() - - do { - acknowledge_ints_ASAP(); - - if (status & link_interrupt) { - spin_lock(&tp->link_lock); - do_some_link_stat_stuff(); - spin_lock(&tp->link_lock); - } - - if (status & rx_interrupt) { - receive_packets(dev); - } - - if (status & rx_nobufs) { - make_rx_buffs_avail(); - } - - if (status & tx_related) { - spin_lock(&tp->lock); - tx_ring_free(dev); - if (tx_died) - restart_tx(); - spin_unlock(&tp->lock); - } - - status = read_interrupt_status_reg(); - - } while (!(status & error) || more_work_to_be_done); - return IRQ_HANDLED; -} - ----------------------------------------------------------------------- - -We now change this to what is shown below to NAPI-enable it: - ----------------------------------------------------------------------- -static irqreturn_t -netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - struct net_device *dev = (struct net_device *)dev_instance; - struct my_private *tp = (struct my_private *)dev->priv; - - status = read_interrupt_status_reg(); - if (status == 0) - return IRQ_NONE; /* Shared IRQ: not us */ - if (status == 0xffff) - return IRQ_HANDLED; /* Hot unplug */ - if (status & error) - do_some_error_handling(); - - do { -/************************ start note *********************************/ - acknowledge_ints_ASAP(); // dont ack rx and rxnobuff here -/************************ end note *********************************/ - - if (status & link_interrupt) { - spin_lock(&tp->link_lock); - do_some_link_stat_stuff(); - spin_unlock(&tp->link_lock); - } -/************************ start note *********************************/ - if (status & rx_interrupt || (status & rx_nobuffs)) { - if (netif_rx_schedule_prep(dev)) { - - /* disable interrupts caused - * by arriving packets */ - disable_rx_and_rxnobuff_ints(); - /* tell system we have work to be done. */ - __netif_rx_schedule(dev); - } else { - printk("driver bug! interrupt while in poll\n"); - /* FIX by disabling interrupts */ - disable_rx_and_rxnobuff_ints(); - } - } -/************************ end note note *********************************/ - - if (status & tx_related) { - spin_lock(&tp->lock); - tx_ring_free(dev); - - if (tx_died) - restart_tx(); - spin_unlock(&tp->lock); - } - - status = read_interrupt_status_reg(); - -/************************ start note *********************************/ - } while (!(status & error) || more_work_to_be_done(status)); -/************************ end note note *********************************/ - return IRQ_HANDLED; -} - ---------------------------------------------------------------------- - - -We note several things from above: - -I) Any interrupt source which is caused by arriving packets is now -turned off when it occurs. Depending on the hardware, there could be -several reasons that arriving packets would cause interrupts; these are the -interrupt sources we wish to avoid. The two common ones are a) a packet -arriving (rxint) b) a packet arriving and finding no DMA buffers available -(rxnobuff) . -This means also acknowledge_ints_ASAP() will not clear the status -register for those two items above; clearing is done in the place where -proper work is done within NAPI; at the poll() and refill_rx_ring() -discussed further below. -netif_rx_schedule_prep() returns 1 if device is in running state and -gets successfully added to the core poll list. If we get a zero value -we can _almost_ assume are already added to the list (instead of not running. -Logic based on the fact that you shouldn't get interrupt if not running) -We rectify this by disabling rx and rxnobuf interrupts. - -II) that receive_packets(dev) and make_rx_buffs_avail() may have disappeared. -These functionalities are still around actually...... - -infact, receive_packets(dev) is very close to my_poll() and -make_rx_buffs_avail() is invoked from my_poll() - -4) converting receive_packets() to dev->poll() -=============================================== - -We need to convert the classical D Becker receive_packets(dev) to my_poll() - -First the typical receive_packets() below: -------------------------------------------------------------------- - -/* this is called by interrupt handler */ -static void receive_packets (struct net_device *dev) -{ - - struct my_private *tp = (struct my_private *)dev->priv; - rx_ring = tp->rx_ring; - cur_rx = tp->cur_rx; - int entry = cur_rx % RX_RING_SIZE; - int received = 0; - int rx_work_limit = tp->dirty_rx + RX_RING_SIZE - tp->cur_rx; - - while (rx_ring_not_empty) { - u32 rx_status; - unsigned int rx_size; - unsigned int pkt_size; - struct sk_buff *skb; - /* read size+status of next frame from DMA ring buffer */ - /* the number 16 and 4 are just examples */ - rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset)); - rx_size = rx_status >> 16; - pkt_size = rx_size - 4; - - /* process errors */ - if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) || - (!(rx_status & RxStatusOK))) { - netdrv_rx_err (rx_status, dev, tp, ioaddr); - return; - } - - if (--rx_work_limit < 0) - break; - - /* grab a skb */ - skb = dev_alloc_skb (pkt_size + 2); - if (skb) { - . - . - netif_rx (skb); - . - . - } else { /* OOM */ - /*seems very driver specific ... some just pass - whatever is on the ring already. */ - } - - /* move to the next skb on the ring */ - entry = (++tp->cur_rx) % RX_RING_SIZE; - received++ ; - - } - - /* store current ring pointer state */ - tp->cur_rx = cur_rx; - - /* Refill the Rx ring buffers if they are needed */ - refill_rx_ring(); - . - . - -} -------------------------------------------------------------------- -We change it to a new one below; note the additional parameter in -the call. - -------------------------------------------------------------------- - -/* this is called by the network core */ -static int my_poll (struct net_device *dev, int *budget) -{ - - struct my_private *tp = (struct my_private *)dev->priv; - rx_ring = tp->rx_ring; - cur_rx = tp->cur_rx; - int entry = cur_rx % RX_BUF_LEN; - /* maximum packets to send to the stack */ -/************************ note note *********************************/ - int rx_work_limit = dev->quota; - -/************************ end note note *********************************/ - do { // outer beginning loop starts here - - clear_rx_status_register_bit(); - - while (rx_ring_not_empty) { - u32 rx_status; - unsigned int rx_size; - unsigned int pkt_size; - struct sk_buff *skb; - /* read size+status of next frame from DMA ring buffer */ - /* the number 16 and 4 are just examples */ - rx_status = le32_to_cpu (*(u32 *) (rx_ring + ring_offset)); - rx_size = rx_status >> 16; - pkt_size = rx_size - 4; - - /* process errors */ - if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) || - (!(rx_status & RxStatusOK))) { - netdrv_rx_err (rx_status, dev, tp, ioaddr); - return 1; - } - -/************************ note note *********************************/ - if (--rx_work_limit < 0) { /* we got packets, but no quota */ - /* store current ring pointer state */ - tp->cur_rx = cur_rx; - - /* Refill the Rx ring buffers if they are needed */ - refill_rx_ring(dev); - goto not_done; - } -/********************** end note **********************************/ - - /* grab a skb */ - skb = dev_alloc_skb (pkt_size + 2); - if (skb) { - . - . -/************************ note note *********************************/ - netif_receive_skb (skb); -/********************** end note **********************************/ - . - . - } else { /* OOM */ - /*seems very driver specific ... common is just pass - whatever is on the ring already. */ - } - - /* move to the next skb on the ring */ - entry = (++tp->cur_rx) % RX_RING_SIZE; - received++ ; - - } - - /* store current ring pointer state */ - tp->cur_rx = cur_rx; - - /* Refill the Rx ring buffers if they are needed */ - refill_rx_ring(dev); - - /* no packets on ring; but new ones can arrive since we last - checked */ - status = read_interrupt_status_reg(); - if (rx status is not set) { - /* If something arrives in this narrow window, - an interrupt will be generated */ - goto done; - } - /* done! at least that's what it looks like ;-> - if new packets came in after our last check on status bits - they'll be caught by the while check and we go back and clear them - since we havent exceeded our quota */ - } while (rx_status_is_set); - -done: - -/************************ note note *********************************/ - dev->quota -= received; - *budget -= received; - - /* If RX ring is not full we are out of memory. */ - if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) - goto oom; - - /* we are happy/done, no more packets on ring; put us back - to where we can start processing interrupts again */ - netif_rx_complete(dev); - enable_rx_and_rxnobuf_ints(); - - /* The last op happens after poll completion. Which means the following: - * 1. it can race with disabling irqs in irq handler (which are done to - * schedule polls) - * 2. it can race with dis/enabling irqs in other poll threads - * 3. if an irq raised after the beginning of the outer beginning - * loop (marked in the code above), it will be immediately - * triggered here. - * - * Summarizing: the logic may result in some redundant irqs both - * due to races in masking and due to too late acking of already - * processed irqs. The good news: no events are ever lost. - */ - - return 0; /* done */ - -not_done: - if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 || - tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) - refill_rx_ring(dev); - - if (!received) { - printk("received==0\n"); - received = 1; - } - dev->quota -= received; - *budget -= received; - return 1; /* not_done */ - -oom: - /* Start timer, stop polling, but do not enable rx interrupts. */ - start_poll_timer(dev); - return 0; /* we'll take it from here so tell core "done"*/ - -/************************ End note note *********************************/ -} -------------------------------------------------------------------- - -From above we note that: -0) rx_work_limit = dev->quota -1) refill_rx_ring() is in charge of clearing the bit for rxnobuff when -it does the work. -2) We have a done and not_done state. -3) instead of netif_rx() we call netif_receive_skb() to pass the skb. -4) we have a new way of handling oom condition -5) A new outer for (;;) loop has been added. This serves the purpose of -ensuring that if a new packet has come in, after we are all set and done, -and we have not exceeded our quota that we continue sending packets up. - - ------------------------------------------------------------ -Poll timer code will need to do the following: - -a) - - if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 || - tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) - refill_rx_ring(dev); - - /* If RX ring is not full we are still out of memory. - Restart the timer again. Else we re-add ourselves - to the master poll list. - */ - - if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) - restart_timer(); - - else netif_rx_schedule(dev); /* we are back on the poll list */ - -5) dev->close() and dev->suspend() issues -========================================== -The driver writer needn't worry about this; the top net layer takes -care of it. - -6) Adding new Stats to /proc -============================= -In order to debug some of the new features, we introduce new stats -that need to be collected. -TODO: Fill this later. - -APPENDIX 1: discussion on using ethernet HW FC -============================================== -Most chips with FC only send a pause packet when they run out of Rx buffers. -Since packets are pulled off the DMA ring by a softirq in NAPI, -if the system is slow in grabbing them and we have a high input -rate (faster than the system's capacity to remove packets), then theoretically -there will only be one rx interrupt for all packets during a given packetstorm. -Under low load, we might have a single interrupt per packet. -FC should be programmed to apply in the case when the system cant pull out -packets fast enough i.e send a pause only when you run out of rx buffers. -Note FC in itself is a good solution but we have found it to not be -much of a commodity feature (both in NICs and switches) and hence falls -under the same category as using NIC based mitigation. Also, experiments -indicate that it's much harder to resolve the resource allocation -issue (aka lazy receiving that NAPI offers) and hence quantify its usefulness -proved harder. In any case, FC works even better with NAPI but is not -necessary. - - -APPENDIX 2: the "rotting packet" race-window avoidance scheme -============================================================= - -There are two types of associations seen here - -1) status/int which honors level triggered IRQ - -If a status bit for receive or rxnobuff is set and the corresponding -interrupt-enable bit is not on, then no interrupts will be generated. However, -as soon as the "interrupt-enable" bit is unmasked, an immediate interrupt is -generated. [assuming the status bit was not turned off]. -Generally the concept of level triggered IRQs in association with a status and -interrupt-enable CSR register set is used to avoid the race. - -If we take the example of the tulip: -"pending work" is indicated by the status bit(CSR5 in tulip). -the corresponding interrupt bit (CSR7 in tulip) might be turned off (but -the CSR5 will continue to be turned on with new packet arrivals even if -we clear it the first time) -Very important is the fact that if we turn on the interrupt bit on when -status is set that an immediate irq is triggered. - -If we cleared the rx ring and proclaimed there was "no more work -to be done" and then went on to do a few other things; then when we enable -interrupts, there is a possibility that a new packet might sneak in during -this phase. It helps to look at the pseudo code for the tulip poll -routine: - --------------------------- - do { - ACK; - while (ring_is_not_empty()) { - work-work-work - if quota is exceeded: exit, no touching irq status/mask - } - /* No packets, but new can arrive while we are doing this*/ - CSR5 := read - if (CSR5 is not set) { - /* If something arrives in this narrow window here, - * where the comments are ;-> irq will be generated */ - unmask irqs; - exit poll; - } - } while (rx_status_is_set); ------------------------- - -CSR5 bit of interest is only the rx status. -If you look at the last if statement: -you just finished grabbing all the packets from the rx ring .. you check if -status bit says there are more packets just in ... it says none; you then -enable rx interrupts again; if a new packet just came in during this check, -we are counting that CSR5 will be set in that small window of opportunity -and that by re-enabling interrupts, we would actually trigger an interrupt -to register the new packet for processing. - -[The above description nay be very verbose, if you have better wording -that will make this more understandable, please suggest it.] - -2) non-capable hardware - -These do not generally respect level triggered IRQs. Normally, -irqs may be lost while being masked and the only way to leave poll is to do -a double check for new input after netif_rx_complete() is invoked -and re-enable polling (after seeing this new input). - -Sample code: - ---------- - . - . -restart_poll: - while (ring_is_not_empty()) { - work-work-work - if quota is exceeded: exit, not touching irq status/mask - } - . - . - . - enable_rx_interrupts() - netif_rx_complete(dev); - if (ring_has_new_packet() && netif_rx_reschedule(dev, received)) { - disable_rx_and_rxnobufs() - goto restart_poll - } while (rx_status_is_set); ---------- - -Basically netif_rx_complete() removes us from the poll list, but because a -new packet which will never be caught due to the possibility of a race -might come in, we attempt to re-add ourselves to the poll list. - - - - -APPENDIX 3: Scheduling issues. -============================== -As seen NAPI moves processing to softirq level. Linux uses the ksoftirqd as the -general solution to schedule softirq's to run before next interrupt and by putting -them under scheduler control. Also this prevents consecutive softirq's from -monopolize the CPU. This also have the effect that the priority of ksoftirq needs -to be considered when running very CPU-intensive applications and networking to -get the proper balance of softirq/user balance. Increasing ksoftirq priority to 0 -(eventually more) is reported cure problems with low network performance at high -CPU load. - -Most used processes in a GIGE router: -USER PID %CPU %MEM SIZE RSS TTY STAT START TIME COMMAND -root 3 0.2 0.0 0 0 ? RWN Aug 15 602:00 (ksoftirqd_CPU0) -root 232 0.0 7.9 41400 40884 ? S Aug 15 74:12 gated - --------------------------------------------------------------------- - -relevant sites: -================== -ftp://robur.slu.se/pub/Linux/net-development/NAPI/ - - --------------------------------------------------------------------- -TODO: Write net-skeleton.c driver. -------------------------------------------------------------- - -Authors: -======== -Alexey Kuznetsov -Jamal Hadi Salim -Robert Olsson - -Acknowledgements: -================ -People who made this document better: - -Lennert Buytenhek -Andrew Morton -Manfred Spraul -Donald Becker -Jeff Garzik diff -puN Documentation/networking/netconsole.txt~git-net Documentation/networking/netconsole.txt --- a/Documentation/networking/netconsole.txt~git-net +++ a/Documentation/networking/netconsole.txt @@ -3,6 +3,10 @@ started by Ingo Molnar , Sep 9 2003 Please send bug reports to Matt Mackall +and Satyam Sharma + +Introduction: +============= This module logs kernel printk messages over UDP allowing debugging of problem where disk logging fails and serial consoles are impractical. @@ -13,6 +17,9 @@ the specified interface as soon as possi capture of early kernel panics, it does capture most of the boot process. +Sender and receiver configuration: +================================== + It takes a string configuration parameter "netconsole" in the following format: @@ -34,21 +41,113 @@ Examples: insmod netconsole netconsole=@/,@10.0.0.2/ +It also supports logging to multiple remote agents by specifying +parameters for the multiple agents separated by semicolons and the +complete string enclosed in "quotes", thusly: + + modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/" + Built-in netconsole starts immediately after the TCP stack is initialized and attempts to bring up the supplied dev at the supplied address. The remote host can run either 'netcat -u -l -p ' or syslogd. +Dynamic reconfiguration: +======================== + +Dynamic reconfigurability is a useful addition to netconsole that enables +remote logging targets to be dynamically added, removed, or have their +parameters reconfigured at runtime from a configfs-based userspace interface. +[ Note that the parameters of netconsole targets that were specified/created +from the boot/module option are not exposed via this interface, and hence +cannot be modified dynamically. ] + +To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the +netconsole module (or kernel, if netconsole is built-in). + +Some examples follow (where configfs is mounted at the /sys/kernel/config +mountpoint). + +To add a remote logging target (target names can be arbitrary): + + cd /sys/kernel/config/netconsole/ + mkdir target1 + +Note that newly created targets have default parameter values (as mentioned +above) and are disabled by default -- they must first be enabled by writing +"1" to the "enabled" attribute (usually after setting parameters accordingly) +as described below. + +To remove a target: + + rmdir /sys/kernel/config/netconsole/othertarget/ + +The interface exposes these parameters of a netconsole target to userspace: + + enabled Is this target currently enabled? (read-write) + dev_name Local network interface name (read-write) + local_port Source UDP port to use (read-write) + remote_port Remote agent's UDP port (read-write) + local_ip Source IP address to use (read-write) + remote_ip Remote agent's IP address (read-write) + local_mac Local interface's MAC address (read-only) + remote_mac Remote agent's MAC address (read-write) + +The "enabled" attribute is also used to control whether the parameters of +a target can be updated or not -- you can modify the parameters of only +disabled targets (i.e. if "enabled" is 0). + +To update a target's parameters: + + cat enabled # check if enabled is 1 + echo 0 > enabled # disable the target (if required) + echo eth2 > dev_name # set local interface + echo 10.0.0.4 > remote_ip # update some parameter + echo cb:a9:87:65:43:21 > remote_mac # update more parameters + echo 1 > enabled # enable target again + +You can also update the local interface dynamically. This is especially +useful if you want to use interfaces that have newly come up (and may not +have existed when netconsole was loaded / initialized). + +Miscellaneous notes: +==================== + WARNING: the default target ethernet setting uses the broadcast ethernet address to send packets, which can cause increased load on other systems on the same ethernet segment. +TIP: some LAN switches may be configured to suppress ethernet broadcasts +so it is advised to explicitly specify the remote agents' MAC addresses +from the config parameters passed to netconsole. + +TIP: to find out the MAC address of, say, 10.0.0.2, you may try using: + + ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2 + +TIP: in case the remote logging agent is on a separate LAN subnet than +the sender, it is suggested to try specifying the MAC address of the +default gateway (you may use /sbin/route -n to find it out) as the +remote MAC address instead. + NOTE: the network device (eth1 in the above case) can run any kind of other network traffic, netconsole is not intrusive. Netconsole might cause slight delays in other traffic if the volume of kernel messages is high, but should have no other impact. +NOTE: if you find that the remote logging agent is not receiving or +printing all messages from the sender, it is likely that you have set +the "console_loglevel" parameter (on the sender) to only send high +priority messages to the console. You can change this at runtime using: + + dmesg -n 8 + +or by specifying "debug" on the kernel command line at boot, to send +all kernel messages to the console. A specific value for this parameter +can also be set using the "loglevel" kernel boot option. See the +dmesg(8) man page and Documentation/kernel-parameters.txt for details. + Netconsole was designed to be as instantaneous as possible, to enable the logging of even the most critical kernel bugs. It works from IRQ contexts as well, and does not enable interrupts while diff -puN Documentation/networking/netdevices.txt~git-net Documentation/networking/netdevices.txt --- a/Documentation/networking/netdevices.txt~git-net +++ a/Documentation/networking/netdevices.txt @@ -95,9 +95,13 @@ dev->set_multicast_list: Synchronization: netif_tx_lock spinlock. Context: BHs disabled -dev->poll: - Synchronization: __LINK_STATE_RX_SCHED bit in dev->state. See - dev_close code and comments in net/core/dev.c for more info. +struct napi_struct synchronization rules +======================================== +napi->poll: + Synchronization: NAPI_STATE_SCHED bit in napi->state. Device + driver's dev->close method will invoke napi_disable() on + all NAPI instances which will do a sleeping poll on the + NAPI_STATE_SCHED napi->state bit, waiting for all pending + NAPI activity to cease. Context: softirq will be called with interrupts disabled by netconsole. - diff -puN drivers/infiniband/ulp/ipoib/ipoib.h~git-net drivers/infiniband/ulp/ipoib/ipoib.h --- a/drivers/infiniband/ulp/ipoib/ipoib.h~git-net +++ a/drivers/infiniband/ulp/ipoib/ipoib.h @@ -228,6 +228,8 @@ struct ipoib_dev_priv { struct net_device *dev; + struct napi_struct napi; + unsigned long flags; struct mutex mcast_mutex; @@ -351,7 +353,7 @@ extern struct workqueue_struct *ipoib_wo /* functions */ -int ipoib_poll(struct net_device *dev, int *budget); +int ipoib_poll(struct napi_struct *napi, int budget); void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr); struct ipoib_ah *ipoib_create_ah(struct net_device *dev, diff -puN drivers/infiniband/ulp/ipoib/ipoib_ib.c~git-net drivers/infiniband/ulp/ipoib/ipoib_ib.c --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c~git-net +++ a/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -281,63 +281,58 @@ static void ipoib_ib_handle_tx_wc(struct wc->status, wr_id, wc->vendor_err); } -int ipoib_poll(struct net_device *dev, int *budget) +int ipoib_poll(struct napi_struct *napi, int budget) { - struct ipoib_dev_priv *priv = netdev_priv(dev); - int max = min(*budget, dev->quota); + struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi); + struct net_device *dev = priv->dev; int done; int t; - int empty; int n, i; done = 0; - empty = 0; - while (max) { +poll_more: + while (done < budget) { + int max = (budget - done); + t = min(IPOIB_NUM_WC, max); n = ib_poll_cq(priv->cq, t, priv->ibwc); - for (i = 0; i < n; ++i) { + for (i = 0; i < n; i++) { struct ib_wc *wc = priv->ibwc + i; if (wc->wr_id & IPOIB_CM_OP_SRQ) { ++done; - --max; ipoib_cm_handle_rx_wc(dev, wc); } else if (wc->wr_id & IPOIB_OP_RECV) { ++done; - --max; ipoib_ib_handle_rx_wc(dev, wc); } else ipoib_ib_handle_tx_wc(dev, wc); } - if (n != t) { - empty = 1; + if (n != t) break; - } } - dev->quota -= done; - *budget -= done; - - if (empty) { - netif_rx_complete(dev); + if (done < budget) { + netif_rx_complete(dev, napi); if (unlikely(ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)) && - netif_rx_reschedule(dev, 0)) - return 1; - - return 0; + netif_rx_reschedule(napi)) + goto poll_more; } - return 1; + return done; } void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) { - netif_rx_schedule(dev_ptr); + struct net_device *dev = dev_ptr; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + netif_rx_schedule(dev, &priv->napi); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -585,7 +580,6 @@ int ipoib_ib_dev_stop(struct net_device int i; clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - netif_poll_disable(dev); ipoib_cm_dev_stop(dev); @@ -668,7 +662,6 @@ timeout: msleep(1); } - netif_poll_enable(dev); ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP); return 0; diff -puN drivers/infiniband/ulp/ipoib/ipoib_main.c~git-net drivers/infiniband/ulp/ipoib/ipoib_main.c --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c~git-net +++ a/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -98,16 +98,20 @@ int ipoib_open(struct net_device *dev) ipoib_dbg(priv, "bringing up interface\n"); + napi_enable(&priv->napi); set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); if (ipoib_pkey_dev_delay_open(dev)) return 0; - if (ipoib_ib_dev_open(dev)) + if (ipoib_ib_dev_open(dev)) { + napi_disable(&priv->napi); return -EINVAL; + } if (ipoib_ib_dev_up(dev)) { ipoib_ib_dev_stop(dev, 1); + napi_disable(&priv->napi); return -EINVAL; } @@ -140,6 +144,7 @@ static int ipoib_stop(struct net_device ipoib_dbg(priv, "stopping interface\n"); clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); + napi_disable(&priv->napi); netif_stop_queue(dev); @@ -948,8 +953,8 @@ static void ipoib_setup(struct net_devic dev->hard_header = ipoib_hard_header; dev->set_multicast_list = ipoib_set_mcast_list; dev->neigh_setup = ipoib_neigh_setup_dev; - dev->poll = ipoib_poll; - dev->weight = 100; + + netif_napi_add(dev, &priv->napi, ipoib_poll, 100); dev->watchdog_timeo = HZ; diff -puN drivers/net/8139cp.c~git-net drivers/net/8139cp.c --- a/drivers/net/8139cp.c~git-net +++ a/drivers/net/8139cp.c @@ -334,6 +334,8 @@ struct cp_private { spinlock_t lock; u32 msg_enable; + struct napi_struct napi; + struct pci_dev *pdev; u32 rx_config; u16 cpcmd; @@ -501,12 +503,12 @@ static inline unsigned int cp_rx_csum_ok return 0; } -static int cp_rx_poll (struct net_device *dev, int *budget) +static int cp_rx_poll(struct napi_struct *napi, int budget) { - struct cp_private *cp = netdev_priv(dev); - unsigned rx_tail = cp->rx_tail; - unsigned rx_work = dev->quota; - unsigned rx; + struct cp_private *cp = container_of(napi, struct cp_private, napi); + struct net_device *dev = cp->dev; + unsigned int rx_tail = cp->rx_tail; + int rx; rx_status_loop: rx = 0; @@ -588,33 +590,28 @@ rx_next: desc->opts1 = cpu_to_le32(DescOwn | cp->rx_buf_sz); rx_tail = NEXT_RX(rx_tail); - if (!rx_work--) + if (rx >= budget) break; } cp->rx_tail = rx_tail; - dev->quota -= rx; - *budget -= rx; - /* if we did not reach work limit, then we're done with * this round of polling */ - if (rx_work) { + if (rx < budget) { unsigned long flags; if (cpr16(IntrStatus) & cp_rx_intr_mask) goto rx_status_loop; - local_irq_save(flags); + spin_lock_irqsave(&cp->lock, flags); cpw16_f(IntrMask, cp_intr_mask); - __netif_rx_complete(dev); - local_irq_restore(flags); - - return 0; /* done */ + __netif_rx_complete(dev, napi); + spin_unlock_irqrestore(&cp->lock, flags); } - return 1; /* not done */ + return rx; } static irqreturn_t cp_interrupt (int irq, void *dev_instance) @@ -647,9 +644,9 @@ static irqreturn_t cp_interrupt (int irq } if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr)) - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &cp->napi)) { cpw16_f(IntrMask, cp_norx_intr_mask); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &cp->napi); } if (status & (TxOK | TxErr | TxEmpty | SWInt)) @@ -1177,6 +1174,8 @@ static int cp_open (struct net_device *d if (rc) return rc; + napi_enable(&cp->napi); + cp_init_hw(cp); rc = request_irq(dev->irq, cp_interrupt, IRQF_SHARED, dev->name, dev); @@ -1190,6 +1189,7 @@ static int cp_open (struct net_device *d return 0; err_out_hw: + napi_disable(&cp->napi); cp_stop_hw(cp); cp_free_rings(cp); return rc; @@ -1200,6 +1200,8 @@ static int cp_close (struct net_device * struct cp_private *cp = netdev_priv(dev); unsigned long flags; + napi_disable(&cp->napi); + if (netif_msg_ifdown(cp)) printk(KERN_DEBUG "%s: disabling interface\n", dev->name); @@ -1936,11 +1938,10 @@ static int cp_init_one (struct pci_dev * dev->hard_start_xmit = cp_start_xmit; dev->get_stats = cp_get_stats; dev->do_ioctl = cp_ioctl; - dev->poll = cp_rx_poll; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = cp_poll_controller; #endif - dev->weight = 16; /* arbitrary? from NAPI_HOWTO.txt. */ + netif_napi_add(dev, &cp->napi, cp_rx_poll, 16); #ifdef BROKEN dev->change_mtu = cp_change_mtu; #endif diff -puN drivers/net/8139too.c~git-net drivers/net/8139too.c --- a/drivers/net/8139too.c~git-net +++ a/drivers/net/8139too.c @@ -572,6 +572,8 @@ struct rtl8139_private { int drv_flags; struct pci_dev *pci_dev; u32 msg_enable; + struct napi_struct napi; + struct net_device *dev; struct net_device_stats stats; unsigned char *rx_ring; @@ -632,10 +634,10 @@ static void rtl8139_tx_timeout (struct n static void rtl8139_init_ring (struct net_device *dev); static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev); -static int rtl8139_poll(struct net_device *dev, int *budget); #ifdef CONFIG_NET_POLL_CONTROLLER static void rtl8139_poll_controller(struct net_device *dev); #endif +static int rtl8139_poll(struct napi_struct *napi, int budget); static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance); static int rtl8139_close (struct net_device *dev); static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd); @@ -957,6 +959,7 @@ static int __devinit rtl8139_init_one (s assert (dev != NULL); tp = netdev_priv(dev); + tp->dev = dev; ioaddr = tp->mmio_addr; assert (ioaddr != NULL); @@ -970,8 +973,7 @@ static int __devinit rtl8139_init_one (s /* The Rtl8139-specific entries in the device structure. */ dev->open = rtl8139_open; dev->hard_start_xmit = rtl8139_start_xmit; - dev->poll = rtl8139_poll; - dev->weight = 64; + netif_napi_add(dev, &tp->napi, rtl8139_poll, 64); dev->stop = rtl8139_close; dev->get_stats = rtl8139_get_stats; dev->set_multicast_list = rtl8139_set_rx_mode; @@ -1326,6 +1328,8 @@ static int rtl8139_open (struct net_devi } + napi_enable(&tp->napi); + tp->mii.full_duplex = tp->mii.force_media; tp->tx_flag = (TX_FIFO_THRESH << 11) & 0x003f0000; @@ -2097,39 +2101,32 @@ static void rtl8139_weird_interrupt (str } } -static int rtl8139_poll(struct net_device *dev, int *budget) +static int rtl8139_poll(struct napi_struct *napi, int budget) { - struct rtl8139_private *tp = netdev_priv(dev); + struct rtl8139_private *tp = container_of(napi, struct rtl8139_private, napi); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->mmio_addr; - int orig_budget = min(*budget, dev->quota); - int done = 1; + int work_done; spin_lock(&tp->rx_lock); - if (likely(RTL_R16(IntrStatus) & RxAckBits)) { - int work_done; - - work_done = rtl8139_rx(dev, tp, orig_budget); - if (likely(work_done > 0)) { - *budget -= work_done; - dev->quota -= work_done; - done = (work_done < orig_budget); - } - } + work_done = 0; + if (likely(RTL_R16(IntrStatus) & RxAckBits)) + work_done += rtl8139_rx(dev, tp, budget); - if (done) { + if (work_done < budget) { unsigned long flags; /* * Order is important since data can get interrupted * again when we think we are done. */ - local_irq_save(flags); + spin_lock_irqsave(&tp->lock, flags); RTL_W16_F(IntrMask, rtl8139_intr_mask); - __netif_rx_complete(dev); - local_irq_restore(flags); + __netif_rx_complete(dev, napi); + spin_unlock_irqrestore(&tp->lock, flags); } spin_unlock(&tp->rx_lock); - return !done; + return work_done; } /* The interrupt handler does all of the Rx thread work and cleans up @@ -2174,9 +2171,9 @@ static irqreturn_t rtl8139_interrupt (in /* Receive packets are processed by poll routine. If not running start it now. */ if (status & RxAckBits){ - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &tp->napi)) { RTL_W16_F (IntrMask, rtl8139_norx_intr_mask); - __netif_rx_schedule (dev); + __netif_rx_schedule(dev, &tp->napi); } } @@ -2217,7 +2214,8 @@ static int rtl8139_close (struct net_dev void __iomem *ioaddr = tp->mmio_addr; unsigned long flags; - netif_stop_queue (dev); + netif_stop_queue(dev); + napi_disable(&tp->napi); if (netif_msg_ifdown(tp)) printk(KERN_DEBUG "%s: Shutting down ethercard, status was 0x%4.4x.\n", diff -puN drivers/net/Kconfig~git-net drivers/net/Kconfig --- a/drivers/net/Kconfig~git-net +++ a/drivers/net/Kconfig @@ -135,6 +135,12 @@ config TUN If you don't know what to use this for, you don't need it. +config VETH + tristate "Virtual ethernet device" + ---help--- + The device is an ethernet tunnel. Devices are created in pairs. When + one end receives the packet it appears on its pair and vice versa. + config NET_SB1000 tristate "General Instruments Surfboard 1000" depends on PNP @@ -2417,6 +2423,7 @@ config CHELSIO_T3 config EHEA tristate "eHEA Ethernet support" depends on IBMEBUS + select INET_LRO ---help--- This driver supports the IBM pSeries eHEA ethernet adapter. @@ -3029,6 +3036,16 @@ config NETCONSOLE If you want to log kernel messages over the network, enable this. See for details. +config NETCONSOLE_DYNAMIC + bool "Dynamic reconfiguration of logging targets (EXPERIMENTAL)" + depends on NETCONSOLE && SYSFS && EXPERIMENTAL + select CONFIGFS_FS + help + This option enables the ability to dynamically reconfigure target + parameters (interface, IP addresses, port numbers, MAC addresses) + at runtime through a userspace interface exported using configfs. + See for details. + config NETPOLL def_bool NETCONSOLE diff -puN drivers/net/Makefile~git-net drivers/net/Makefile --- a/drivers/net/Makefile~git-net +++ a/drivers/net/Makefile @@ -194,6 +194,7 @@ obj-$(CONFIG_MACSONIC) += macsonic.o obj-$(CONFIG_MACMACE) += macmace.o obj-$(CONFIG_MAC89x0) += mac89x0.o obj-$(CONFIG_TUN) += tun.o +obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_DL2K) += dl2k.o obj-$(CONFIG_R8169) += r8169.o diff -puN drivers/net/amd8111e.c~git-net drivers/net/amd8111e.c --- a/drivers/net/amd8111e.c~git-net +++ a/drivers/net/amd8111e.c @@ -723,9 +723,10 @@ static int amd8111e_tx(struct net_device #ifdef CONFIG_AMD8111E_NAPI /* This function handles the driver receive operation in polling mode */ -static int amd8111e_rx_poll(struct net_device *dev, int * budget) +static int amd8111e_rx_poll(struct napi_struct *napi, int budget) { - struct amd8111e_priv *lp = netdev_priv(dev); + struct amd8111e_priv *lp = container_of(napi, struct amd8111e_priv, napi); + struct net_device *dev = lp->amd8111e_net_dev; int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK; void __iomem *mmio = lp->mmio; struct sk_buff *skb,*new_skb; @@ -737,7 +738,7 @@ static int amd8111e_rx_poll(struct net_d #if AMD8111E_VLAN_TAG_USED short vtag; #endif - int rx_pkt_limit = dev->quota; + int rx_pkt_limit = budget; unsigned long flags; do{ @@ -838,21 +839,14 @@ static int amd8111e_rx_poll(struct net_d } while(intr0 & RINT0); /* Receive descriptor is empty now */ - dev->quota -= num_rx_pkt; - *budget -= num_rx_pkt; - spin_lock_irqsave(&lp->lock, flags); - netif_rx_complete(dev); + __netif_rx_complete(dev, napi); writel(VAL0|RINTEN0, mmio + INTEN0); writel(VAL2 | RDMD0, mmio + CMD0); spin_unlock_irqrestore(&lp->lock, flags); - return 0; rx_not_empty: - /* Do not call a netif_rx_complete */ - dev->quota -= num_rx_pkt; - *budget -= num_rx_pkt; - return 1; + return num_rx_pkt; } #else @@ -1287,11 +1281,11 @@ static irqreturn_t amd8111e_interrupt(in /* Check if Receive Interrupt has occurred. */ #ifdef CONFIG_AMD8111E_NAPI if(intr0 & RINT0){ - if(netif_rx_schedule_prep(dev)){ + if(netif_rx_schedule_prep(dev, &lp->napi)){ /* Disable receive interupts */ writel(RINTEN0, mmio + INTEN0); /* Schedule a polling routine */ - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &lp->napi); } else if (intren0 & RINTEN0) { printk("************Driver bug! \ @@ -1345,6 +1339,8 @@ static int amd8111e_close(struct net_dev struct amd8111e_priv *lp = netdev_priv(dev); netif_stop_queue(dev); + napi_disable(&lp->napi); + spin_lock_irq(&lp->lock); amd8111e_disable_interrupt(lp); @@ -1375,12 +1371,15 @@ static int amd8111e_open(struct net_devi dev->name, dev)) return -EAGAIN; + napi_enable(&lp->napi); + spin_lock_irq(&lp->lock); amd8111e_init_hw_default(lp); if(amd8111e_restart(dev)){ spin_unlock_irq(&lp->lock); + napi_disable(&lp->napi); if (dev->irq) free_irq(dev->irq, dev); return -ENOMEM; @@ -2031,8 +2030,7 @@ static int __devinit amd8111e_probe_one( dev->tx_timeout = amd8111e_tx_timeout; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; #ifdef CONFIG_AMD8111E_NAPI - dev->poll = amd8111e_rx_poll; - dev->weight = 32; + netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32); #endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = amd8111e_poll; diff -puN drivers/net/amd8111e.h~git-net drivers/net/amd8111e.h --- a/drivers/net/amd8111e.h~git-net +++ a/drivers/net/amd8111e.h @@ -763,6 +763,8 @@ struct amd8111e_priv{ /* Reg memory mapped address */ void __iomem *mmio; + struct napi_struct napi; + spinlock_t lock; /* Guard lock */ unsigned long rx_idx, tx_idx; /* The next free ring entry */ unsigned long tx_complete_idx; diff -puN drivers/net/arm/ep93xx_eth.c~git-net drivers/net/arm/ep93xx_eth.c --- a/drivers/net/arm/ep93xx_eth.c~git-net +++ a/drivers/net/arm/ep93xx_eth.c @@ -169,6 +169,9 @@ struct ep93xx_priv spinlock_t tx_pending_lock; unsigned int tx_pending; + struct net_device *dev; + struct napi_struct napi; + struct net_device_stats stats; struct mii_if_info mii; @@ -190,15 +193,11 @@ static struct net_device_stats *ep93xx_g return &(ep->stats); } -static int ep93xx_rx(struct net_device *dev, int *budget) +static int ep93xx_rx(struct net_device *dev, int processed, int budget) { struct ep93xx_priv *ep = netdev_priv(dev); - int rx_done; - int processed; - rx_done = 0; - processed = 0; - while (*budget > 0) { + while (processed < budget) { int entry; struct ep93xx_rstat *rstat; u32 rstat0; @@ -211,10 +210,8 @@ static int ep93xx_rx(struct net_device * rstat0 = rstat->rstat0; rstat1 = rstat->rstat1; - if (!(rstat0 & RSTAT0_RFP) || !(rstat1 & RSTAT1_RFP)) { - rx_done = 1; + if (!(rstat0 & RSTAT0_RFP) || !(rstat1 & RSTAT1_RFP)) break; - } rstat->rstat0 = 0; rstat->rstat1 = 0; @@ -275,8 +272,6 @@ static int ep93xx_rx(struct net_device * err: ep->rx_pointer = (entry + 1) & (RX_QUEUE_ENTRIES - 1); processed++; - dev->quota--; - (*budget)--; } if (processed) { @@ -284,7 +279,7 @@ err: wrw(ep, REG_RXSTSENQ, processed); } - return !rx_done; + return processed; } static int ep93xx_have_more_rx(struct ep93xx_priv *ep) @@ -293,36 +288,32 @@ static int ep93xx_have_more_rx(struct ep return !!((rstat->rstat0 & RSTAT0_RFP) && (rstat->rstat1 & RSTAT1_RFP)); } -static int ep93xx_poll(struct net_device *dev, int *budget) +static int ep93xx_poll(struct napi_struct *napi, int budget) { - struct ep93xx_priv *ep = netdev_priv(dev); - - /* - * @@@ Have to stop polling if device is downed while we - * are polling. - */ + struct ep93xx_priv *ep = container_of(napi, struct ep93xx_priv, napi); + struct net_device *dev = ep->dev; + int rx = 0; poll_some_more: - if (ep93xx_rx(dev, budget)) - return 1; - - netif_rx_complete(dev); - - spin_lock_irq(&ep->rx_lock); - wrl(ep, REG_INTEN, REG_INTEN_TX | REG_INTEN_RX); - if (ep93xx_have_more_rx(ep)) { - wrl(ep, REG_INTEN, REG_INTEN_TX); - wrl(ep, REG_INTSTSP, REG_INTSTS_RX); + rx = ep93xx_rx(dev, rx, budget); + if (rx < budget) { + int more = 0; + + spin_lock_irq(&ep->rx_lock); + __netif_rx_complete(dev, napi); + wrl(ep, REG_INTEN, REG_INTEN_TX | REG_INTEN_RX); + if (ep93xx_have_more_rx(ep)) { + wrl(ep, REG_INTEN, REG_INTEN_TX); + wrl(ep, REG_INTSTSP, REG_INTSTS_RX); + more = 1; + } spin_unlock_irq(&ep->rx_lock); - if (netif_rx_reschedule(dev, 0)) + if (more && netif_rx_reschedule(napi)) goto poll_some_more; - - return 0; } - spin_unlock_irq(&ep->rx_lock); - return 0; + return rx; } static int ep93xx_xmit(struct sk_buff *skb, struct net_device *dev) @@ -426,9 +417,9 @@ static irqreturn_t ep93xx_irq(int irq, v if (status & REG_INTSTS_RX) { spin_lock(&ep->rx_lock); - if (likely(__netif_rx_schedule_prep(dev))) { + if (likely(__netif_rx_schedule_prep(dev, &ep->napi))) { wrl(ep, REG_INTEN, REG_INTEN_TX); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &ep->napi); } spin_unlock(&ep->rx_lock); } @@ -648,7 +639,10 @@ static int ep93xx_open(struct net_device dev->dev_addr[4], dev->dev_addr[5]); } + napi_enable(&ep->napi); + if (ep93xx_start_hw(dev)) { + napi_disable(&ep->napi); ep93xx_free_buffers(ep); return -EIO; } @@ -662,6 +656,7 @@ static int ep93xx_open(struct net_device err = request_irq(ep->irq, ep93xx_irq, IRQF_SHARED, dev->name, dev); if (err) { + napi_disable(&ep->napi); ep93xx_stop_hw(dev); ep93xx_free_buffers(ep); return err; @@ -678,6 +673,7 @@ static int ep93xx_close(struct net_devic { struct ep93xx_priv *ep = netdev_priv(dev); + napi_disable(&ep->napi); netif_stop_queue(dev); wrl(ep, REG_GIINTMSK, 0); @@ -788,14 +784,12 @@ struct net_device *ep93xx_dev_alloc(stru dev->get_stats = ep93xx_get_stats; dev->ethtool_ops = &ep93xx_ethtool_ops; - dev->poll = ep93xx_poll; dev->hard_start_xmit = ep93xx_xmit; dev->open = ep93xx_open; dev->stop = ep93xx_close; dev->do_ioctl = ep93xx_ioctl; dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; - dev->weight = 64; return dev; } @@ -847,6 +841,8 @@ static int ep93xx_eth_probe(struct platf goto err_out; } ep = netdev_priv(dev); + ep->dev = dev; + netif_napi_add(dev, &ep->napi, ep93xx_poll, 64); platform_set_drvdata(pdev, dev); diff -puN drivers/net/b44.c~git-net drivers/net/b44.c --- a/drivers/net/b44.c~git-net +++ a/drivers/net/b44.c @@ -848,10 +848,11 @@ static int b44_rx(struct b44 *bp, int bu return received; } -static int b44_poll(struct net_device *netdev, int *budget) +static int b44_poll(struct napi_struct *napi, int budget) { - struct b44 *bp = netdev_priv(netdev); - int done; + struct b44 *bp = container_of(napi, struct b44, napi); + struct net_device *netdev = bp->dev; + int work_done; spin_lock_irq(&bp->lock); @@ -862,22 +863,9 @@ static int b44_poll(struct net_device *n } spin_unlock_irq(&bp->lock); - done = 1; - if (bp->istat & ISTAT_RX) { - int orig_budget = *budget; - int work_done; - - if (orig_budget > netdev->quota) - orig_budget = netdev->quota; - - work_done = b44_rx(bp, orig_budget); - - *budget -= work_done; - netdev->quota -= work_done; - - if (work_done >= orig_budget) - done = 0; - } + work_done = 0; + if (bp->istat & ISTAT_RX) + work_done += b44_rx(bp, budget); if (bp->istat & ISTAT_ERRORS) { unsigned long flags; @@ -888,15 +876,15 @@ static int b44_poll(struct net_device *n b44_init_hw(bp, B44_FULL_RESET_SKIP_PHY); netif_wake_queue(bp->dev); spin_unlock_irqrestore(&bp->lock, flags); - done = 1; + work_done = 0; } - if (done) { - netif_rx_complete(netdev); + if (work_done < budget) { + netif_rx_complete(netdev, napi); b44_enable_ints(bp); } - return (done ? 0 : 1); + return work_done; } static irqreturn_t b44_interrupt(int irq, void *dev_id) @@ -924,13 +912,13 @@ static irqreturn_t b44_interrupt(int irq goto irq_ack; } - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &bp->napi)) { /* NOTE: These writes are posted by the readback of * the ISTAT register below. */ bp->istat = istat; __b44_disable_ints(bp); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &bp->napi); } else { printk(KERN_ERR PFX "%s: Error, poll already scheduled\n", dev->name); @@ -1420,6 +1408,8 @@ static int b44_open(struct net_device *d if (err) goto out; + napi_enable(&bp->napi); + b44_init_rings(bp); b44_init_hw(bp, B44_FULL_RESET); @@ -1427,6 +1417,7 @@ static int b44_open(struct net_device *d err = request_irq(dev->irq, b44_interrupt, IRQF_SHARED, dev->name, dev); if (unlikely(err < 0)) { + napi_disable(&bp->napi); b44_chip_reset(bp); b44_free_rings(bp); b44_free_consistent(bp); @@ -1609,7 +1600,7 @@ static int b44_close(struct net_device * netif_stop_queue(dev); - netif_poll_disable(dev); + napi_disable(&bp->napi); del_timer_sync(&bp->timer); @@ -1626,8 +1617,6 @@ static int b44_close(struct net_device * free_irq(dev->irq, dev); - netif_poll_enable(dev); - if (bp->flags & B44_FLAG_WOL_ENABLE) { b44_init_hw(bp, B44_PARTIAL_RESET); b44_setup_wol(bp); @@ -2194,8 +2183,7 @@ static int __devinit b44_init_one(struct dev->set_mac_address = b44_set_mac_addr; dev->do_ioctl = b44_ioctl; dev->tx_timeout = b44_tx_timeout; - dev->poll = b44_poll; - dev->weight = 64; + netif_napi_add(dev, &bp->napi, b44_poll, 64); dev->watchdog_timeo = B44_TX_TIMEOUT; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = b44_poll_controller; diff -puN drivers/net/b44.h~git-net drivers/net/b44.h --- a/drivers/net/b44.h~git-net +++ a/drivers/net/b44.h @@ -423,6 +423,8 @@ struct b44 { struct ring_info *rx_buffers; struct ring_info *tx_buffers; + struct napi_struct napi; + u32 dma_offset; u32 flags; #define B44_FLAG_B0_ANDLATER 0x00000001 diff -puN drivers/net/bnx2.c~git-net drivers/net/bnx2.c --- a/drivers/net/bnx2.c~git-net +++ a/drivers/net/bnx2.c @@ -428,7 +428,7 @@ bnx2_netif_stop(struct bnx2 *bp) { bnx2_disable_int_sync(bp); if (netif_running(bp->dev)) { - netif_poll_disable(bp->dev); + napi_disable(&bp->napi); netif_tx_disable(bp->dev); bp->dev->trans_start = jiffies; /* prevent tx timeout */ } @@ -440,7 +440,7 @@ bnx2_netif_start(struct bnx2 *bp) if (atomic_dec_and_test(&bp->intr_sem)) { if (netif_running(bp->dev)) { netif_wake_queue(bp->dev); - netif_poll_enable(bp->dev); + napi_enable(&bp->napi); bnx2_enable_int(bp); } } @@ -2551,7 +2551,7 @@ bnx2_msi(int irq, void *dev_instance) if (unlikely(atomic_read(&bp->intr_sem) != 0)) return IRQ_HANDLED; - netif_rx_schedule(dev); + netif_rx_schedule(dev, &bp->napi); return IRQ_HANDLED; } @@ -2568,7 +2568,7 @@ bnx2_msi_1shot(int irq, void *dev_instan if (unlikely(atomic_read(&bp->intr_sem) != 0)) return IRQ_HANDLED; - netif_rx_schedule(dev); + netif_rx_schedule(dev, &bp->napi); return IRQ_HANDLED; } @@ -2604,9 +2604,9 @@ bnx2_interrupt(int irq, void *dev_instan if (unlikely(atomic_read(&bp->intr_sem) != 0)) return IRQ_HANDLED; - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &bp->napi)) { bp->last_status_idx = sblk->status_idx; - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &bp->napi); } return IRQ_HANDLED; @@ -2632,12 +2632,14 @@ bnx2_has_work(struct bnx2 *bp) } static int -bnx2_poll(struct net_device *dev, int *budget) +bnx2_poll(struct napi_struct *napi, int budget) { - struct bnx2 *bp = netdev_priv(dev); + struct bnx2 *bp = container_of(napi, struct bnx2, napi); + struct net_device *dev = bp->dev; struct status_block *sblk = bp->status_blk; u32 status_attn_bits = sblk->status_attn_bits; u32 status_attn_bits_ack = sblk->status_attn_bits_ack; + int work_done = 0; if ((status_attn_bits & STATUS_ATTN_EVENTS) != (status_attn_bits_ack & STATUS_ATTN_EVENTS)) { @@ -2655,23 +2657,14 @@ bnx2_poll(struct net_device *dev, int *b if (bp->status_blk->status_tx_quick_consumer_index0 != bp->hw_tx_cons) bnx2_tx_int(bp); - if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) { - int orig_budget = *budget; - int work_done; - - if (orig_budget > dev->quota) - orig_budget = dev->quota; - - work_done = bnx2_rx_int(bp, orig_budget); - *budget -= work_done; - dev->quota -= work_done; - } + if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) + work_done = bnx2_rx_int(bp, budget); bp->last_status_idx = bp->status_blk->status_idx; rmb(); if (!bnx2_has_work(bp)) { - netif_rx_complete(dev); + netif_rx_complete(dev, napi); if (likely(bp->flags & USING_MSI_FLAG)) { REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | @@ -2686,10 +2679,9 @@ bnx2_poll(struct net_device *dev, int *b REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | bp->last_status_idx); - return 0; } - return 1; + return work_done; } /* Called with rtnl_lock from vlan functions and also netif_tx_lock @@ -5039,6 +5031,8 @@ bnx2_open(struct net_device *dev) if (rc) return rc; + napi_enable(&bp->napi); + if ((bp->flags & MSI_CAP_FLAG) && !disable_msi) { if (pci_enable_msi(bp->pdev) == 0) { bp->flags |= USING_MSI_FLAG; @@ -5049,6 +5043,7 @@ bnx2_open(struct net_device *dev) rc = bnx2_request_irq(bp); if (rc) { + napi_disable(&bp->napi); bnx2_free_mem(bp); return rc; } @@ -5056,6 +5051,7 @@ bnx2_open(struct net_device *dev) rc = bnx2_init_nic(bp); if (rc) { + napi_disable(&bp->napi); bnx2_free_irq(bp); bnx2_free_skbs(bp); bnx2_free_mem(bp); @@ -5088,6 +5084,7 @@ bnx2_open(struct net_device *dev) rc = bnx2_request_irq(bp); if (rc) { + napi_disable(&bp->napi); bnx2_free_skbs(bp); bnx2_free_mem(bp); del_timer_sync(&bp->timer); @@ -5301,6 +5298,7 @@ bnx2_close(struct net_device *dev) while (bp->in_reset_task) msleep(1); + /* This does napi_disable() for us. */ bnx2_netif_stop(bp); del_timer_sync(&bp->timer); if (bp->flags & NO_WOL_FLAG) @@ -6857,11 +6855,10 @@ bnx2_init_one(struct pci_dev *pdev, cons #ifdef BCM_VLAN dev->vlan_rx_register = bnx2_vlan_rx_register; #endif - dev->poll = bnx2_poll; dev->ethtool_ops = &bnx2_ethtool_ops; - dev->weight = 64; bp = netdev_priv(dev); + netif_napi_add(dev, &bp->napi, bnx2_poll, 64); #if defined(HAVE_POLL_CONTROLLER) || defined(CONFIG_NET_POLL_CONTROLLER) dev->poll_controller = poll_bnx2; diff -puN drivers/net/bnx2.h~git-net drivers/net/bnx2.h --- a/drivers/net/bnx2.h~git-net +++ a/drivers/net/bnx2.h @@ -6473,6 +6473,8 @@ struct bnx2 { struct net_device *dev; struct pci_dev *pdev; + struct napi_struct napi; + atomic_t intr_sem; struct status_block *status_blk; diff -puN drivers/net/cassini.c~git-net drivers/net/cassini.c --- a/drivers/net/cassini.c~git-net +++ a/drivers/net/cassini.c @@ -2485,7 +2485,7 @@ static irqreturn_t cas_interruptN(int ir if (status & INTR_RX_DONE_ALT) { /* handle rx separately */ #ifdef USE_NAPI cas_mask_intr(cp); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &cp->napi); #else cas_rx_ringN(cp, ring, 0); #endif @@ -2536,7 +2536,7 @@ static irqreturn_t cas_interrupt1(int ir if (status & INTR_RX_DONE_ALT) { /* handle rx separately */ #ifdef USE_NAPI cas_mask_intr(cp); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &cp->napi); #else cas_rx_ringN(cp, 1, 0); #endif @@ -2592,7 +2592,7 @@ static irqreturn_t cas_interrupt(int irq if (status & INTR_RX_DONE) { #ifdef USE_NAPI cas_mask_intr(cp); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &cp->napi); #else cas_rx_ringN(cp, 0, 0); #endif @@ -2607,9 +2607,10 @@ static irqreturn_t cas_interrupt(int irq #ifdef USE_NAPI -static int cas_poll(struct net_device *dev, int *budget) +static int cas_poll(struct napi_struct *napi, int budget) { - struct cas *cp = netdev_priv(dev); + struct cas *cp = container_of(napi, struct cas, napi); + struct net_device *dev = cp->dev; int i, enable_intr, todo, credits; u32 status = readl(cp->regs + REG_INTR_STATUS); unsigned long flags; @@ -2620,20 +2621,18 @@ static int cas_poll(struct net_device *d /* NAPI rx packets. we spread the credits across all of the * rxc rings - */ - todo = min(*budget, dev->quota); - - /* to make sure we're fair with the work we loop through each + * + * to make sure we're fair with the work we loop through each * ring N_RX_COMP_RING times with a request of - * todo / N_RX_COMP_RINGS + * budget / N_RX_COMP_RINGS */ enable_intr = 1; credits = 0; for (i = 0; i < N_RX_COMP_RINGS; i++) { int j; for (j = 0; j < N_RX_COMP_RINGS; j++) { - credits += cas_rx_ringN(cp, j, todo / N_RX_COMP_RINGS); - if (credits >= todo) { + credits += cas_rx_ringN(cp, j, budget / N_RX_COMP_RINGS); + if (credits >= budget) { enable_intr = 0; goto rx_comp; } @@ -2641,9 +2640,6 @@ static int cas_poll(struct net_device *d } rx_comp: - *budget -= credits; - dev->quota -= credits; - /* final rx completion */ spin_lock_irqsave(&cp->lock, flags); if (status) @@ -2674,11 +2670,10 @@ rx_comp: #endif spin_unlock_irqrestore(&cp->lock, flags); if (enable_intr) { - netif_rx_complete(dev); + netif_rx_complete(dev, napi); cas_unmask_intr(cp); - return 0; } - return 1; + return credits; } #endif @@ -4351,6 +4346,9 @@ static int cas_open(struct net_device *d goto err_spare; } +#ifdef USE_NAPI + napi_enable(&cp->napi); +#endif /* init hw */ cas_lock_all_save(cp, flags); cas_clean_rings(cp); @@ -4376,6 +4374,9 @@ static int cas_close(struct net_device * unsigned long flags; struct cas *cp = netdev_priv(dev); +#ifdef USE_NAPI + napi_enable(&cp->napi); +#endif /* Make sure we don't get distracted by suspend/resume */ mutex_lock(&cp->pm_mutex); @@ -5062,8 +5063,7 @@ static int __devinit cas_init_one(struct dev->watchdog_timeo = CAS_TX_TIMEOUT; dev->change_mtu = cas_change_mtu; #ifdef USE_NAPI - dev->poll = cas_poll; - dev->weight = 64; + netif_napi_add(dev, &cp->napi, cas_poll, 64); #endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = cas_netpoll; diff -puN drivers/net/cassini.h~git-net drivers/net/cassini.h --- a/drivers/net/cassini.h~git-net +++ a/drivers/net/cassini.h @@ -4280,6 +4280,8 @@ struct cas { int rx_cur[N_RX_COMP_RINGS], rx_new[N_RX_COMP_RINGS]; int rx_last[N_RX_DESC_RINGS]; + struct napi_struct napi; + /* Set when chip is actually in operational state * (ie. not power managed) */ int hw_running; diff -puN drivers/net/chelsio/common.h~git-net drivers/net/chelsio/common.h --- a/drivers/net/chelsio/common.h~git-net +++ a/drivers/net/chelsio/common.h @@ -278,6 +278,7 @@ struct adapter { struct peespi *espi; struct petp *tp; + struct napi_struct napi; struct port_info port[MAX_NPORTS]; struct delayed_work stats_update_task; struct timer_list stats_update_timer; diff -puN drivers/net/chelsio/cxgb2.c~git-net drivers/net/chelsio/cxgb2.c --- a/drivers/net/chelsio/cxgb2.c~git-net +++ a/drivers/net/chelsio/cxgb2.c @@ -255,8 +255,11 @@ static int cxgb_open(struct net_device * struct adapter *adapter = dev->priv; int other_ports = adapter->open_device_map & PORT_MASK; - if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) + napi_enable(&adapter->napi); + if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) { + napi_disable(&adapter->napi); return err; + } __set_bit(dev->if_port, &adapter->open_device_map); link_start(&adapter->port[dev->if_port]); @@ -274,6 +277,7 @@ static int cxgb_close(struct net_device struct cmac *mac = p->mac; netif_stop_queue(dev); + napi_disable(&adapter->napi); mac->ops->disable(mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); netif_carrier_off(dev); @@ -1113,8 +1117,7 @@ static int __devinit init_one(struct pci netdev->poll_controller = t1_netpoll; #endif #ifdef CONFIG_CHELSIO_T1_NAPI - netdev->weight = 64; - netdev->poll = t1_poll; + netif_napi_add(netdev, &adapter->napi, t1_poll, 64); #endif SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); diff -puN drivers/net/chelsio/sge.c~git-net drivers/net/chelsio/sge.c --- a/drivers/net/chelsio/sge.c~git-net +++ a/drivers/net/chelsio/sge.c @@ -1620,23 +1620,20 @@ static int process_pure_responses(struct * or protection from interrupts as data interrupts are off at this point and * other adapter interrupts do not interfere. */ -int t1_poll(struct net_device *dev, int *budget) +int t1_poll(struct napi_struct *napi, int budget) { - struct adapter *adapter = dev->priv; + struct adapter *adapter = container_of(napi, struct adapter, napi); + struct net_device *dev = adapter->port[0].dev; int work_done; - work_done = process_responses(adapter, min(*budget, dev->quota)); - *budget -= work_done; - dev->quota -= work_done; - - if (unlikely(responses_pending(adapter))) - return 1; - - netif_rx_complete(dev); - writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING); - - return 0; + work_done = process_responses(adapter, budget); + if (likely(!responses_pending(adapter))) { + netif_rx_complete(dev, napi); + writel(adapter->sge->respQ.cidx, + adapter->regs + A_SG_SLEEPING); + } + return work_done; } /* @@ -1653,13 +1650,13 @@ irqreturn_t t1_interrupt(int irq, void * writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); - if (__netif_rx_schedule_prep(dev)) { + if (napi_schedule_prep(&adapter->napi)) { if (process_pure_responses(adapter)) - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &adapter->napi); else { /* no data, no NAPI needed */ writel(sge->respQ.cidx, adapter->regs + A_SG_SLEEPING); - netif_poll_enable(dev); /* undo schedule_prep */ + napi_enable(&adapter->napi); /* undo schedule_prep */ } } return IRQ_HANDLED; diff -puN drivers/net/chelsio/sge.h~git-net drivers/net/chelsio/sge.h --- a/drivers/net/chelsio/sge.h~git-net +++ a/drivers/net/chelsio/sge.h @@ -77,7 +77,7 @@ int t1_sge_configure(struct sge *, struc int t1_sge_set_coalesce_params(struct sge *, struct sge_params *); void t1_sge_destroy(struct sge *); irqreturn_t t1_interrupt(int irq, void *cookie); -int t1_poll(struct net_device *, int *); +int t1_poll(struct napi_struct *, int); int t1_start_xmit(struct sk_buff *skb, struct net_device *dev); void t1_set_vlan_accel(struct adapter *adapter, int on_off); diff -puN drivers/net/cxgb3/adapter.h~git-net drivers/net/cxgb3/adapter.h --- a/drivers/net/cxgb3/adapter.h~git-net +++ a/drivers/net/cxgb3/adapter.h @@ -50,7 +50,9 @@ typedef irqreturn_t(*intr_handler_t) (in struct vlan_group; +struct adapter; struct port_info { + struct adapter *adapter; struct vlan_group *vlan_grp; const struct port_type_info *port_type; u8 port_id; @@ -170,11 +172,13 @@ enum { /* per port SGE statistics */ SGE_PSTAT_MAX /* must be last */ }; +struct adapter_napi; struct sge_qset { /* an SGE queue set */ struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct sge_txq txq[SGE_TXQ_PER_SET]; - struct net_device *netdev; /* associated net device */ + struct adapter_napi *anapi; + struct net_device *netdev; unsigned long txq_stopped; /* which Tx queues are stopped */ struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ unsigned long port_stats[SGE_PSTAT_MAX]; @@ -185,6 +189,13 @@ struct sge { spinlock_t reg_lock; /* guards non-atomic SGE registers (eg context) */ }; +struct adapter_napi { + struct napi_struct napi; + struct adapter *adapter; + int port; + int qset; +}; + struct adapter { struct t3cdev tdev; struct list_head adapter_list; @@ -219,11 +230,7 @@ struct adapter { struct delayed_work adap_check_task; struct work_struct ext_intr_handler_task; - /* - * Dummy netdevices are needed when using multiple receive queues with - * NAPI as each netdevice can service only one queue. - */ - struct net_device *dummy_netdev[SGE_QSETS - 1]; + struct adapter_napi napi[SGE_QSETS]; struct dentry *debugfs_root; @@ -251,12 +258,6 @@ static inline struct port_info *adap2pin return netdev_priv(adap->port[idx]); } -/* - * We use the spare atalk_ptr to map a net device to its SGE queue set. - * This is a macro so it can be used as l-value. - */ -#define dev2qset(netdev) ((netdev)->atalk_ptr) - #define OFFLOAD_DEVMAP_BIT 15 #define tdev2adap(d) container_of(d, struct adapter, tdev) @@ -282,7 +283,8 @@ int t3_mgmt_tx(struct adapter *adap, str void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p); int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, int irq_vec_idx, const struct qset_params *p, - int ntxq, struct net_device *netdev); + int ntxq, struct adapter_napi *anapi, + struct net_device *dev); int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data); irqreturn_t t3_sge_intr_msix(int irq, void *cookie); diff -puN drivers/net/cxgb3/cxgb3_main.c~git-net drivers/net/cxgb3/cxgb3_main.c --- a/drivers/net/cxgb3/cxgb3_main.c~git-net +++ a/drivers/net/cxgb3/cxgb3_main.c @@ -339,46 +339,24 @@ static void setup_rss(struct adapter *ad V_RRCPLCPUSIZE(6), cpus, rspq_map); } -/* - * If we have multiple receive queues per port serviced by NAPI we need one - * netdevice per queue as NAPI operates on netdevices. We already have one - * netdevice, namely the one associated with the interface, so we use dummy - * ones for any additional queues. Note that these netdevices exist purely - * so that NAPI has something to work with, they do not represent network - * ports and are not registered. - */ -static int init_dummy_netdevs(struct adapter *adap) +static int init_napi(struct adapter *adap) { - int i, j, dummy_idx = 0; - struct net_device *nd; + int i, j, napi_idx = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; const struct port_info *pi = netdev_priv(dev); for (j = 0; j < pi->nqsets - 1; j++) { - if (!adap->dummy_netdev[dummy_idx]) { - nd = alloc_netdev(0, "", ether_setup); - if (!nd) - goto free_all; - - nd->priv = adap; - nd->weight = 64; - set_bit(__LINK_STATE_START, &nd->state); - adap->dummy_netdev[dummy_idx] = nd; - } - strcpy(adap->dummy_netdev[dummy_idx]->name, dev->name); - dummy_idx++; + netif_napi_add(dev, &adap->napi[napi_idx].napi, + NULL, 64); + adap->napi[napi_idx].adapter = adap; + adap->napi[napi_idx].port = i; + adap->napi[napi_idx].qset = j; + napi_idx++; } } return 0; - -free_all: - while (--dummy_idx >= 0) { - free_netdev(adap->dummy_netdev[dummy_idx]); - adap->dummy_netdev[dummy_idx] = NULL; - } - return -ENOMEM; } /* @@ -389,19 +367,20 @@ free_all: static void quiesce_rx(struct adapter *adap) { int i; - struct net_device *dev; - for_each_port(adap, i) { - dev = adap->port[i]; - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) - msleep(1); + for (i = 0; i < ARRAY_SIZE(adap->napi); i++) { + struct adapter_napi *anapi = &adap->napi[i]; + napi_disable(&anapi->napi); } +} - for (i = 0; i < ARRAY_SIZE(adap->dummy_netdev); i++) { - dev = adap->dummy_netdev[i]; - if (dev) - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) - msleep(1); +static void enable_all_napi(struct adapter *adap) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(adap->napi); i++) { + struct adapter_napi *anapi = &adap->napi[i]; + napi_enable(&anapi->napi); } } @@ -415,7 +394,7 @@ static void quiesce_rx(struct adapter *a */ static int setup_sge_qsets(struct adapter *adap) { - int i, j, err, irq_idx = 0, qset_idx = 0, dummy_dev_idx = 0; + int i, j, err, irq_idx = 0, qset_idx = 0, anapi_idx = 0; unsigned int ntxq = SGE_TXQ_PER_SET; if (adap->params.rev > 0 && !(adap->flags & USING_MSI)) @@ -426,12 +405,15 @@ static int setup_sge_qsets(struct adapte const struct port_info *pi = netdev_priv(dev); for (j = 0; j < pi->nqsets; ++j, ++qset_idx) { + struct adapter_napi *anapi; + + anapi = &adap->napi[anapi_idx++]; + err = t3_sge_alloc_qset(adap, qset_idx, 1, (adap->flags & USING_MSIX) ? qset_idx + 1 : irq_idx, &adap->params.sge.qset[qset_idx], ntxq, - j == 0 ? dev : - adap-> dummy_netdev[dummy_dev_idx++]); + anapi, dev); if (err) { t3_free_sge_resources(adap); return err; @@ -482,7 +464,8 @@ static ssize_t attr_store(struct device #define CXGB3_SHOW(name, val_expr) \ static ssize_t format_##name(struct net_device *dev, char *buf) \ { \ - struct adapter *adap = dev->priv; \ + struct port_info *pi = netdev_priv(dev); \ + struct adapter *adap = pi->adapter; \ return sprintf(buf, "%u\n", val_expr); \ } \ static ssize_t show_##name(struct device *d, struct device_attribute *attr, \ @@ -493,7 +476,8 @@ static ssize_t show_##name(struct device static ssize_t set_nfilters(struct net_device *dev, unsigned int val) { - struct adapter *adap = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; int min_tids = is_offload(adap) ? MC5_MIN_TIDS : 0; if (adap->flags & FULL_INIT_DONE) @@ -515,7 +499,8 @@ static ssize_t store_nfilters(struct dev static ssize_t set_nservers(struct net_device *dev, unsigned int val) { - struct adapter *adap = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; if (adap->flags & FULL_INIT_DONE) return -EBUSY; @@ -556,9 +541,10 @@ static struct attribute_group cxgb3_attr static ssize_t tm_attr_show(struct device *d, struct device_attribute *attr, char *buf, int sched) { - ssize_t len; + struct port_info *pi = netdev_priv(to_net_dev(d)); + struct adapter *adap = pi->adapter; unsigned int v, addr, bpt, cpt; - struct adapter *adap = to_net_dev(d)->priv; + ssize_t len; addr = A_TP_TX_MOD_Q1_Q0_RATE_LIMIT - sched / 2; rtnl_lock(); @@ -581,10 +567,11 @@ static ssize_t tm_attr_show(struct devic static ssize_t tm_attr_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len, int sched) { + struct port_info *pi = netdev_priv(to_net_dev(d)); + struct adapter *adap = pi->adapter; + unsigned int val; char *endp; ssize_t ret; - unsigned int val; - struct adapter *adap = to_net_dev(d)->priv; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -763,7 +750,7 @@ static int cxgb_up(struct adapter *adap) if (err) goto out; - err = init_dummy_netdevs(adap); + err = init_napi(adap); if (err) goto out; @@ -858,7 +845,8 @@ static void schedule_chk_task(struct ada static int offload_open(struct net_device *dev) { - struct adapter *adapter = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; struct t3cdev *tdev = T3CDEV(dev); int adap_up = adapter->open_device_map & PORT_MASK; int err; @@ -924,13 +912,16 @@ static int offload_close(struct t3cdev * static int cxgb_open(struct net_device *dev) { - int err; - struct adapter *adapter = dev->priv; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; int other_ports = adapter->open_device_map & PORT_MASK; + int err; - if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) + enable_all_napi(adapter); + if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0) { + quiesce_rx(adapter); return err; + } set_bit(pi->port_id, &adapter->open_device_map); if (is_offload(adapter) && !ofld_disable) { @@ -951,17 +942,17 @@ static int cxgb_open(struct net_device * static int cxgb_close(struct net_device *dev) { - struct adapter *adapter = dev->priv; - struct port_info *p = netdev_priv(dev); + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; - t3_port_intr_disable(adapter, p->port_id); + t3_port_intr_disable(adapter, pi->port_id); netif_stop_queue(dev); - p->phy.ops->power_down(&p->phy, 1); + pi->phy.ops->power_down(&pi->phy, 1); netif_carrier_off(dev); - t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); + t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); spin_lock(&adapter->work_lock); /* sync with update task */ - clear_bit(p->port_id, &adapter->open_device_map); + clear_bit(pi->port_id, &adapter->open_device_map); spin_unlock(&adapter->work_lock); if (!(adapter->open_device_map & PORT_MASK)) @@ -976,13 +967,13 @@ static int cxgb_close(struct net_device static struct net_device_stats *cxgb_get_stats(struct net_device *dev) { - struct adapter *adapter = dev->priv; - struct port_info *p = netdev_priv(dev); - struct net_device_stats *ns = &p->netstats; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + struct net_device_stats *ns = &pi->netstats; const struct mac_stats *pstats; spin_lock(&adapter->stats_lock); - pstats = t3_mac_update_stats(&p->mac); + pstats = t3_mac_update_stats(&pi->mac); spin_unlock(&adapter->stats_lock); ns->tx_bytes = pstats->tx_octets; @@ -1015,14 +1006,16 @@ static struct net_device_stats *cxgb_get static u32 get_msglevel(struct net_device *dev) { - struct adapter *adapter = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; return adapter->msg_enable; } static void set_msglevel(struct net_device *dev, u32 val) { - struct adapter *adapter = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; adapter->msg_enable = val; } @@ -1096,8 +1089,9 @@ static int get_eeprom_len(struct net_dev static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; u32 fw_vers = 0; - struct adapter *adapter = dev->priv; t3_get_fw_version(adapter, &fw_vers); @@ -1136,8 +1130,8 @@ static unsigned long collect_sge_port_st static void get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { - struct adapter *adapter = dev->priv; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; const struct mac_stats *s; spin_lock(&adapter->stats_lock); @@ -1205,7 +1199,8 @@ static inline void reg_block_dump(struct static void get_regs(struct net_device *dev, struct ethtool_regs *regs, void *buf) { - struct adapter *ap = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *ap = pi->adapter; /* * Version scheme: @@ -1246,8 +1241,9 @@ static int restart_autoneg(struct net_de static int cxgb3_phys_id(struct net_device *dev, u32 data) { + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; int i; - struct adapter *adapter = dev->priv; if (data == 0) data = 2; @@ -1408,8 +1404,8 @@ static int set_rx_csum(struct net_device static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e) { - const struct adapter *adapter = dev->priv; - const struct port_info *pi = netdev_priv(dev); + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; const struct qset_params *q = &adapter->params.sge.qset[pi->first_qset]; e->rx_max_pending = MAX_RX_BUFFERS; @@ -1425,10 +1421,10 @@ static void get_sge_param(struct net_dev static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e) { - int i; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; struct qset_params *q; - struct adapter *adapter = dev->priv; - const struct port_info *pi = netdev_priv(dev); + int i; if (e->rx_pending > MAX_RX_BUFFERS || e->rx_jumbo_pending > MAX_RX_JUMBO_BUFFERS || @@ -1457,7 +1453,8 @@ static int set_sge_param(struct net_devi static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { - struct adapter *adapter = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; struct qset_params *qsp = &adapter->params.sge.qset[0]; struct sge_qset *qs = &adapter->sge.qs[0]; @@ -1471,7 +1468,8 @@ static int set_coalesce(struct net_devic static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { - struct adapter *adapter = dev->priv; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; struct qset_params *q = adapter->params.sge.qset; c->rx_coalesce_usecs = q->coalesce_usecs; @@ -1481,8 +1479,9 @@ static int get_coalesce(struct net_devic static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e, u8 * data) { + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; int i, err = 0; - struct adapter *adapter = dev->priv; u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL); if (!buf) @@ -1592,9 +1591,10 @@ static int in_range(int val, int lo, int static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) { - int ret; + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; u32 cmd; - struct adapter *adapter = dev->priv; + int ret; if (copy_from_user(&cmd, useraddr, sizeof(cmd))) return -EFAULT; @@ -1923,10 +1923,10 @@ static int cxgb_extension_ioctl(struct n static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { - int ret, mmd; - struct adapter *adapter = dev->priv; - struct port_info *pi = netdev_priv(dev); struct mii_ioctl_data *data = if_mii(req); + struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + int ret, mmd; switch (cmd) { case SIOCGMIIPHY: @@ -1994,9 +1994,9 @@ static int cxgb_ioctl(struct net_device static int cxgb_change_mtu(struct net_device *dev, int new_mtu) { - int ret; - struct adapter *adapter = dev->priv; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; + int ret; if (new_mtu < 81) /* accommodate SACK */ return -EINVAL; @@ -2013,8 +2013,8 @@ static int cxgb_change_mtu(struct net_de static int cxgb_set_mac_addr(struct net_device *dev, void *p) { - struct adapter *adapter = dev->priv; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) @@ -2050,8 +2050,8 @@ static void t3_synchronize_rx(struct ada static void vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { - struct adapter *adapter = dev->priv; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; pi->vlan_grp = grp; if (adapter->params.rev > 0) @@ -2070,8 +2070,8 @@ static void vlan_rx_register(struct net_ #ifdef CONFIG_NET_POLL_CONTROLLER static void cxgb_netpoll(struct net_device *dev) { - struct adapter *adapter = dev->priv; struct port_info *pi = netdev_priv(dev); + struct adapter *adapter = pi->adapter; int qidx; for (qidx = pi->first_qset; qidx < pi->first_qset + pi->nqsets; qidx++) { @@ -2433,6 +2433,7 @@ static int __devinit init_one(struct pci adapter->port[i] = netdev; pi = netdev_priv(netdev); + pi->adapter = adapter; pi->rx_csum_offload = 1; pi->nqsets = 1; pi->first_qset = i; @@ -2442,7 +2443,6 @@ static int __devinit init_one(struct pci netdev->irq = pdev->irq; netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len - 1; - netdev->priv = adapter; netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; netdev->features |= NETIF_F_LLTX; if (pci_using_dac) @@ -2462,12 +2462,11 @@ static int __devinit init_one(struct pci #ifdef CONFIG_NET_POLL_CONTROLLER netdev->poll_controller = cxgb_netpoll; #endif - netdev->weight = 64; SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); } - pci_set_drvdata(pdev, adapter->port[0]); + pci_set_drvdata(pdev, adapter); if (t3_prep_adapter(adapter, ai, 1) < 0) { err = -ENODEV; goto out_free_dev; @@ -2547,11 +2546,10 @@ out_release_regions: static void __devexit remove_one(struct pci_dev *pdev) { - struct net_device *dev = pci_get_drvdata(pdev); + struct adapter *adapter = pci_get_drvdata(pdev); - if (dev) { + if (adapter) { int i; - struct adapter *adapter = dev->priv; t3_sge_stop(adapter); sysfs_remove_group(&adapter->port[0]->dev.kobj, @@ -2571,12 +2569,6 @@ static void __devexit remove_one(struct t3_free_sge_resources(adapter); cxgb_disable_msi(adapter); - for (i = 0; i < ARRAY_SIZE(adapter->dummy_netdev); i++) - if (adapter->dummy_netdev[i]) { - free_netdev(adapter->dummy_netdev[i]); - adapter->dummy_netdev[i] = NULL; - } - for_each_port(adapter, i) if (adapter->port[i]) free_netdev(adapter->port[i]); diff -puN drivers/net/cxgb3/sge.c~git-net drivers/net/cxgb3/sge.c --- a/drivers/net/cxgb3/sge.c~git-net +++ a/drivers/net/cxgb3/sge.c @@ -591,9 +591,6 @@ void t3_free_qset(struct adapter *adapte q->rspq.desc, q->rspq.phys_addr); } - if (q->netdev) - q->netdev->atalk_ptr = NULL; - memset(q, 0, sizeof(*q)); } @@ -1073,8 +1070,8 @@ int t3_eth_xmit(struct sk_buff *skb, str { unsigned int ndesc, pidx, credits, gen, compl; const struct port_info *pi = netdev_priv(dev); - struct adapter *adap = dev->priv; - struct sge_qset *qs = dev2qset(dev); + struct adapter *adap = pi->adapter; + struct sge_qset *qs = &adap->sge.qs[pi->first_qset]; struct sge_txq *q = &qs->txq[TXQ_ETH]; /* @@ -1326,7 +1323,7 @@ static void restart_ctrlq(unsigned long struct sk_buff *skb; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_CTRL]; - struct adapter *adap = qs->netdev->priv; + struct adapter *adap = qs->anapi->adapter; spin_lock(&q->lock); again:reclaim_completed_tx_imm(q); @@ -1531,7 +1528,7 @@ static void restart_offloadq(unsigned lo struct sk_buff *skb; struct sge_qset *qs = (struct sge_qset *)data; struct sge_txq *q = &qs->txq[TXQ_OFLD]; - struct adapter *adap = qs->netdev->priv; + struct adapter *adap = qs->anapi->adapter; spin_lock(&q->lock); again:reclaim_completed_tx(adap, q); @@ -1636,8 +1633,8 @@ static inline void offload_enqueue(struc else { struct sge_qset *qs = rspq_to_qset(q); - if (__netif_rx_schedule_prep(qs->netdev)) - __netif_rx_schedule(qs->netdev); + if (napi_schedule_prep(&qs->anapi->napi)) + __netif_rx_schedule(qs->netdev, &qs->anapi->napi); q->rx_head = skb; } q->rx_tail = skb; @@ -1673,33 +1670,32 @@ static inline void deliver_partial_bundl * receive handler. Batches need to be of modest size as we do prefetches * on the packets in each. */ -static int ofld_poll(struct net_device *dev, int *budget) +static int ofld_poll(struct napi_struct *napi, int budget) { - struct adapter *adapter = dev->priv; - struct sge_qset *qs = dev2qset(dev); + struct adapter_napi *anapi = container_of(napi, struct adapter_napi, napi); + struct adapter *adapter = anapi->adapter; + struct net_device *dev = adapter->port[anapi->port]; + struct sge_qset *qs = &adapter->sge.qs[anapi->qset]; struct sge_rspq *q = &qs->rspq; - int work_done, limit = min(*budget, dev->quota), avail = limit; + int work_done = 0; - while (avail) { + while (work_done < budget) { struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE]; int ngathered; spin_lock_irq(&q->lock); head = q->rx_head; if (!head) { - work_done = limit - avail; - *budget -= work_done; - dev->quota -= work_done; - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); spin_unlock_irq(&q->lock); - return 0; + return work_done; } tail = q->rx_tail; q->rx_head = q->rx_tail = NULL; spin_unlock_irq(&q->lock); - for (ngathered = 0; avail && head; avail--) { + for (ngathered = 0; work_done < budget && head; work_done++) { prefetch(head->data); skbs[ngathered] = head; head = head->next; @@ -1721,10 +1717,8 @@ static int ofld_poll(struct net_device * } deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered); } - work_done = limit - avail; - *budget -= work_done; - dev->quota -= work_done; - return 1; + + return work_done; } /** @@ -2073,44 +2067,45 @@ static inline int is_pure_response(const * * Handler for new data events when using NAPI. */ -static int napi_rx_handler(struct net_device *dev, int *budget) +static int napi_rx_handler(struct napi_struct *napi, int budget) { - struct adapter *adap = dev->priv; - struct sge_qset *qs = dev2qset(dev); - int effective_budget = min(*budget, dev->quota); - + struct adapter_napi *anapi = container_of(napi, struct adapter_napi, napi); + struct adapter *adap = anapi->adapter; + struct net_device *dev = adap->port[anapi->port]; + struct sge_qset *qs = &adap->sge.qs[anapi->qset]; + int effective_budget = budget; int work_done = process_responses(adap, qs, effective_budget); - *budget -= work_done; - dev->quota -= work_done; - - if (work_done >= effective_budget) - return 1; - netif_rx_complete(dev); + if (likely(work_done < effective_budget)) { + netif_rx_complete(dev, napi); - /* - * Because we don't atomically flush the following write it is - * possible that in very rare cases it can reach the device in a way - * that races with a new response being written plus an error interrupt - * causing the NAPI interrupt handler below to return unhandled status - * to the OS. To protect against this would require flushing the write - * and doing both the write and the flush with interrupts off. Way too - * expensive and unjustifiable given the rarity of the race. - * - * The race cannot happen at all with MSI-X. - */ - t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | - V_NEWTIMER(qs->rspq.next_holdoff) | - V_NEWINDEX(qs->rspq.cidx)); - return 0; + /* + * Because we don't atomically flush the following + * write it is possible that in very rare cases it can + * reach the device in a way that races with a new + * response being written plus an error interrupt + * causing the NAPI interrupt handler below to return + * unhandled status to the OS. To protect against + * this would require flushing the write and doing + * both the write and the flush with interrupts off. + * Way too expensive and unjustifiable given the + * rarity of the race. + * + * The race cannot happen at all with MSI-X. + */ + t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | + V_NEWTIMER(qs->rspq.next_holdoff) | + V_NEWINDEX(qs->rspq.cidx)); + } + return work_done; } /* * Returns true if the device is already scheduled for polling. */ -static inline int napi_is_scheduled(struct net_device *dev) +static inline int napi_is_scheduled(struct napi_struct *napi) { - return test_bit(__LINK_STATE_RX_SCHED, &dev->state); + return test_bit(NAPI_STATE_SCHED, &napi->state); } /** @@ -2193,8 +2188,8 @@ static inline int handle_responses(struc V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx)); return 0; } - if (likely(__netif_rx_schedule_prep(qs->netdev))) - __netif_rx_schedule(qs->netdev); + if (likely(napi_schedule_prep(&qs->anapi->napi))) + __netif_rx_schedule(qs->netdev, &qs->anapi->napi); return 1; } @@ -2205,7 +2200,7 @@ static inline int handle_responses(struc irqreturn_t t3_sge_intr_msix(int irq, void *cookie) { struct sge_qset *qs = cookie; - struct adapter *adap = qs->netdev->priv; + struct adapter *adap = qs->anapi->adapter; struct sge_rspq *q = &qs->rspq; spin_lock(&q->lock); @@ -2224,7 +2219,7 @@ irqreturn_t t3_sge_intr_msix(int irq, vo irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie) { struct sge_qset *qs = cookie; - struct adapter *adap = qs->netdev->priv; + struct adapter *adap = qs->anapi->adapter; struct sge_rspq *q = &qs->rspq; spin_lock(&q->lock); @@ -2272,11 +2267,14 @@ static irqreturn_t t3_intr_msi(int irq, return IRQ_HANDLED; } -static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q) +static int rspq_check_napi(struct sge_qset *qs) { - if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) { - if (likely(__netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); + struct sge_rspq *q = &qs->rspq; + + if (!napi_is_scheduled(&qs->anapi->napi) && + is_new_response(&q->desc[q->cidx], q)) { + if (likely(napi_schedule_prep(&qs->anapi->napi))) + __netif_rx_schedule(qs->netdev, &qs->anapi->napi); return 1; } return 0; @@ -2297,10 +2295,9 @@ irqreturn_t t3_intr_msi_napi(int irq, vo spin_lock(&q->lock); - new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q); + new_packets = rspq_check_napi(&adap->sge.qs[0]); if (adap->params.nports == 2) - new_packets += rspq_check_napi(adap->sge.qs[1].netdev, - &adap->sge.qs[1].rspq); + new_packets += rspq_check_napi(&adap->sge.qs[1]); if (!new_packets && t3_slow_intr_handler(adap) == 0) q->unhandled_irqs++; @@ -2405,7 +2402,8 @@ static irqreturn_t t3b_intr_napi(int irq u32 map; struct net_device *dev; struct adapter *adap = cookie; - struct sge_rspq *q0 = &adap->sge.qs[0].rspq; + struct sge_qset *qs0 = &adap->sge.qs[0]; + struct sge_rspq *q0 = &qs0->rspq; t3_write_reg(adap, A_PL_CLI, 0); map = t3_read_reg(adap, A_SG_DATA_INTR); @@ -2419,16 +2417,17 @@ static irqreturn_t t3b_intr_napi(int irq t3_slow_intr_handler(adap); if (likely(map & 1)) { - dev = adap->sge.qs[0].netdev; + dev = qs0->netdev; - if (likely(__netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); + if (likely(napi_schedule_prep(&qs0->anapi->napi))) + __netif_rx_schedule(dev, &qs0->anapi->napi); } if (map & 2) { - dev = adap->sge.qs[1].netdev; + struct sge_qset *qs1 = &adap->sge.qs[1]; - if (likely(__netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); + dev = qs1->netdev; + if (likely(napi_schedule_prep(&qs1->anapi->napi))) + __netif_rx_schedule(dev, &qs1->anapi->napi); } spin_unlock(&q0->lock); @@ -2508,7 +2507,7 @@ static void sge_timer_cb(unsigned long d { spinlock_t *lock; struct sge_qset *qs = (struct sge_qset *)data; - struct adapter *adap = qs->netdev->priv; + struct adapter *adap = qs->anapi->adapter; if (spin_trylock(&qs->txq[TXQ_ETH].lock)) { reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]); @@ -2521,7 +2520,7 @@ static void sge_timer_cb(unsigned long d lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock : &adap->sge.qs[0].rspq.lock; if (spin_trylock_irq(lock)) { - if (!napi_is_scheduled(qs->netdev)) { + if (!napi_is_scheduled(&qs->anapi->napi)) { u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS); if (qs->fl[0].credits < qs->fl[0].size) @@ -2555,12 +2554,9 @@ static void sge_timer_cb(unsigned long d */ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p) { - if (!qs->netdev) - return; - qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */ qs->rspq.polling = p->polling; - qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll; + qs->anapi->napi.poll = p->polling ? napi_rx_handler : ofld_poll; } /** @@ -2580,7 +2576,8 @@ void t3_update_qset_coalesce(struct sge_ */ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, int irq_vec_idx, const struct qset_params *p, - int ntxq, struct net_device *netdev) + int ntxq, struct adapter_napi *anapi, + struct net_device *dev) { int i, ret = -ENOMEM; struct sge_qset *q = &adapter->sge.qs[id]; @@ -2701,17 +2698,10 @@ int t3_sge_alloc_qset(struct adapter *ad } spin_unlock(&adapter->sge.reg_lock); - q->netdev = netdev; + q->anapi = anapi; + q->netdev = dev; t3_update_qset_coalesce(q, p); - /* - * We use atalk_ptr as a backpointer to a qset. In case a device is - * associated with multiple queue sets only the first one sets - * atalk_ptr. - */ - if (netdev->atalk_ptr == NULL) - netdev->atalk_ptr = q; - refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL); refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL); refill_rspq(adapter, &q->rspq, q->rspq.size - 1); diff -puN drivers/net/e100.c~git-net drivers/net/e100.c --- a/drivers/net/e100.c~git-net +++ a/drivers/net/e100.c @@ -539,6 +539,7 @@ struct nic { struct csr __iomem *csr; enum scb_cmd_lo cuc_cmd; unsigned int cbs_avail; + struct napi_struct napi; struct cb *cbs; struct cb *cb_to_use; struct cb *cb_to_send; @@ -1974,35 +1975,31 @@ static irqreturn_t e100_intr(int irq, vo if(stat_ack & stat_ack_rnr) nic->ru_running = RU_SUSPENDED; - if(likely(netif_rx_schedule_prep(netdev))) { + if(likely(netif_rx_schedule_prep(netdev, &nic->napi))) { e100_disable_irq(nic); - __netif_rx_schedule(netdev); + __netif_rx_schedule(netdev, &nic->napi); } return IRQ_HANDLED; } -static int e100_poll(struct net_device *netdev, int *budget) +static int e100_poll(struct napi_struct *napi, int budget) { - struct nic *nic = netdev_priv(netdev); - unsigned int work_to_do = min(netdev->quota, *budget); - unsigned int work_done = 0; + struct nic *nic = container_of(napi, struct nic, napi); + struct net_device *netdev = nic->netdev; + int work_done = 0; int tx_cleaned; - e100_rx_clean(nic, &work_done, work_to_do); + e100_rx_clean(nic, &work_done, budget); tx_cleaned = e100_tx_clean(nic); /* If no Rx and Tx cleanup work was done, exit polling mode. */ if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) { - netif_rx_complete(netdev); + netif_rx_complete(netdev, napi); e100_enable_irq(nic); - return 0; } - *budget -= work_done; - netdev->quota -= work_done; - - return 1; + return work_done; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2071,7 +2068,7 @@ static int e100_up(struct nic *nic) nic->netdev->name, nic->netdev))) goto err_no_irq; netif_wake_queue(nic->netdev); - netif_poll_enable(nic->netdev); + napi_enable(&nic->napi); /* enable ints _after_ enabling poll, preventing a race between * disable ints+schedule */ e100_enable_irq(nic); @@ -2089,7 +2086,7 @@ err_rx_clean_list: static void e100_down(struct nic *nic) { /* wait here for poll to complete */ - netif_poll_disable(nic->netdev); + napi_disable(&nic->napi); netif_stop_queue(nic->netdev); e100_hw_reset(nic); free_irq(nic->pdev->irq, nic->netdev); @@ -2572,14 +2569,13 @@ static int __devinit e100_probe(struct p SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops); netdev->tx_timeout = e100_tx_timeout; netdev->watchdog_timeo = E100_WATCHDOG_PERIOD; - netdev->poll = e100_poll; - netdev->weight = E100_NAPI_WEIGHT; #ifdef CONFIG_NET_POLL_CONTROLLER netdev->poll_controller = e100_netpoll; #endif strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); nic = netdev_priv(netdev); + netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT); nic->netdev = netdev; nic->pdev = pdev; nic->msg_enable = (1 << debug) - 1; @@ -2733,7 +2729,7 @@ static int e100_suspend(struct pci_dev * struct nic *nic = netdev_priv(netdev); if (netif_running(netdev)) - netif_poll_disable(nic->netdev); + napi_disable(&nic->napi); del_timer_sync(&nic->watchdog); netif_carrier_off(nic->netdev); netif_device_detach(netdev); @@ -2779,7 +2775,7 @@ static void e100_shutdown(struct pci_dev struct nic *nic = netdev_priv(netdev); if (netif_running(netdev)) - netif_poll_disable(nic->netdev); + napi_disable(&nic->napi); del_timer_sync(&nic->watchdog); netif_carrier_off(nic->netdev); @@ -2804,12 +2800,13 @@ static void e100_shutdown(struct pci_dev static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct net_device *netdev = pci_get_drvdata(pdev); + struct nic *nic = netdev_priv(netdev); /* Similar to calling e100_down(), but avoids adpater I/O. */ netdev->stop(netdev); /* Detach; put netif into state similar to hotplug unplug. */ - netif_poll_enable(netdev); + napi_enable(&nic->napi); netif_device_detach(netdev); pci_disable_device(pdev); diff -puN drivers/net/e1000/e1000.h~git-net drivers/net/e1000/e1000.h --- a/drivers/net/e1000/e1000.h~git-net +++ a/drivers/net/e1000/e1000.h @@ -300,6 +300,7 @@ struct e1000_adapter { int cleaned_count); struct e1000_rx_ring *rx_ring; /* One per active queue */ #ifdef CONFIG_E1000_NAPI + struct napi_struct napi; struct net_device *polling_netdev; /* One per active queue */ #endif int num_tx_queues; diff -puN drivers/net/e1000/e1000_main.c~git-net drivers/net/e1000/e1000_main.c --- a/drivers/net/e1000/e1000_main.c~git-net +++ a/drivers/net/e1000/e1000_main.c @@ -165,7 +165,7 @@ static irqreturn_t e1000_intr_msi(int ir static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring); #ifdef CONFIG_E1000_NAPI -static int e1000_clean(struct net_device *poll_dev, int *budget); +static int e1000_clean(struct napi_struct *napi, int budget); static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring, int *work_done, int work_to_do); @@ -544,7 +544,7 @@ int e1000_up(struct e1000_adapter *adapt clear_bit(__E1000_DOWN, &adapter->flags); #ifdef CONFIG_E1000_NAPI - netif_poll_enable(adapter->netdev); + napi_enable(&adapter->napi); #endif e1000_irq_enable(adapter); @@ -633,7 +633,7 @@ e1000_down(struct e1000_adapter *adapter set_bit(__E1000_DOWN, &adapter->flags); #ifdef CONFIG_E1000_NAPI - netif_poll_disable(netdev); + napi_disable(&adapter->napi); #endif e1000_irq_disable(adapter); @@ -935,8 +935,7 @@ e1000_probe(struct pci_dev *pdev, netdev->tx_timeout = &e1000_tx_timeout; netdev->watchdog_timeo = 5 * HZ; #ifdef CONFIG_E1000_NAPI - netdev->poll = &e1000_clean; - netdev->weight = 64; + netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); #endif netdev->vlan_rx_register = e1000_vlan_rx_register; netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid; @@ -1149,9 +1148,6 @@ e1000_probe(struct pci_dev *pdev, /* tell the stack to leave us alone until e1000_open() is called */ netif_carrier_off(netdev); netif_stop_queue(netdev); -#ifdef CONFIG_E1000_NAPI - netif_poll_disable(netdev); -#endif strcpy(netdev->name, "eth%d"); if ((err = register_netdev(netdev))) @@ -1220,12 +1216,13 @@ e1000_remove(struct pci_dev *pdev) * would have already happened in close and is redundant. */ e1000_release_hw_control(adapter); - unregister_netdev(netdev); #ifdef CONFIG_E1000_NAPI for (i = 0; i < adapter->num_rx_queues; i++) dev_put(&adapter->polling_netdev[i]); #endif + unregister_netdev(netdev); + if (!e1000_check_phy_reset_block(&adapter->hw)) e1000_phy_hw_reset(&adapter->hw); @@ -1323,8 +1320,6 @@ e1000_sw_init(struct e1000_adapter *adap #ifdef CONFIG_E1000_NAPI for (i = 0; i < adapter->num_rx_queues; i++) { adapter->polling_netdev[i].priv = adapter; - adapter->polling_netdev[i].poll = &e1000_clean; - adapter->polling_netdev[i].weight = 64; dev_hold(&adapter->polling_netdev[i]); set_bit(__LINK_STATE_START, &adapter->polling_netdev[i].state); } @@ -1441,7 +1436,7 @@ e1000_open(struct net_device *netdev) clear_bit(__E1000_DOWN, &adapter->flags); #ifdef CONFIG_E1000_NAPI - netif_poll_enable(netdev); + napi_enable(&adapter->napi); #endif e1000_irq_enable(adapter); @@ -3784,12 +3779,12 @@ e1000_intr_msi(int irq, void *data) } #ifdef CONFIG_E1000_NAPI - if (likely(netif_rx_schedule_prep(netdev))) { + if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; - __netif_rx_schedule(netdev); + __netif_rx_schedule(netdev, &adapter->napi); } else e1000_irq_enable(adapter); #else @@ -3869,12 +3864,12 @@ e1000_intr(int irq, void *data) E1000_WRITE_REG(hw, IMC, ~0); E1000_WRITE_FLUSH(hw); } - if (likely(netif_rx_schedule_prep(netdev))) { + if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) { adapter->total_tx_bytes = 0; adapter->total_tx_packets = 0; adapter->total_rx_bytes = 0; adapter->total_rx_packets = 0; - __netif_rx_schedule(netdev); + __netif_rx_schedule(netdev, &adapter->napi); } else /* this really should not happen! if it does it is basically a * bug, but not a hard error, so enable ints and continue */ @@ -3922,10 +3917,10 @@ e1000_intr(int irq, void *data) **/ static int -e1000_clean(struct net_device *poll_dev, int *budget) +e1000_clean(struct napi_struct *napi, int budget) { - struct e1000_adapter *adapter; - int work_to_do = min(*budget, poll_dev->quota); + struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi); + struct net_device *poll_dev = adapter->netdev; int tx_cleaned = 0, work_done = 0; /* Must NOT use netdev_priv macro here. */ @@ -3946,23 +3941,19 @@ e1000_clean(struct net_device *poll_dev, } adapter->clean_rx(adapter, &adapter->rx_ring[0], - &work_done, work_to_do); - - *budget -= work_done; - poll_dev->quota -= work_done; + &work_done, budget); /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((!tx_cleaned && (work_done == 0)) || + if ((tx_cleaned && (work_done < budget)) || !netif_running(poll_dev)) { quit_polling: if (likely(adapter->itr_setting & 3)) e1000_set_itr(adapter); - netif_rx_complete(poll_dev); + netif_rx_complete(poll_dev, napi); e1000_irq_enable(adapter); - return 0; } - return 1; + return work_done; } #endif diff -puN drivers/net/ehea/ehea.h~git-net drivers/net/ehea/ehea.h --- a/drivers/net/ehea/ehea.h~git-net +++ a/drivers/net/ehea/ehea.h @@ -33,13 +33,14 @@ #include #include #include +#include #include #include #include #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0073" +#define DRV_VERSION "EHEA_0074" /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 @@ -58,6 +59,7 @@ #define EHEA_SMALL_QUEUES #define EHEA_NUM_TX_QP 1 +#define EHEA_LRO_MAX_AGGR 64 #ifdef EHEA_SMALL_QUEUES #define EHEA_MAX_CQE_COUNT 1023 @@ -84,6 +86,8 @@ #define EHEA_RQ2_PKT_SIZE 1522 #define EHEA_L_PKT_SIZE 256 /* low latency */ +#define MAX_LRO_DESCRIPTORS 8 + /* Send completion signaling */ /* Protection Domain Identifier */ @@ -351,6 +355,7 @@ struct ehea_q_skb_arr { * Port resources */ struct ehea_port_res { + struct napi_struct napi; struct port_stats p_stats; struct ehea_mr send_mr; /* send memory region */ struct ehea_mr recv_mr; /* receive memory region */ @@ -362,7 +367,6 @@ struct ehea_port_res { struct ehea_cq *send_cq; struct ehea_cq *recv_cq; struct ehea_eq *eq; - struct net_device *d_netdev; struct ehea_q_skb_arr rq1_skba; struct ehea_q_skb_arr rq2_skba; struct ehea_q_skb_arr rq3_skba; @@ -376,6 +380,8 @@ struct ehea_port_res { u64 tx_packets; u64 rx_packets; u32 poll_counter; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[MAX_LRO_DESCRIPTORS]; }; @@ -427,6 +433,7 @@ struct ehea_port { u32 msg_enable; u32 sig_comp_iv; u32 state; + u32 lro_max_aggr; u8 full_duplex; u8 autoneg; u8 num_def_qps; diff -puN drivers/net/ehea/ehea_ethtool.c~git-net drivers/net/ehea/ehea_ethtool.c --- a/drivers/net/ehea/ehea_ethtool.c~git-net +++ a/drivers/net/ehea/ehea_ethtool.c @@ -183,6 +183,9 @@ static char ehea_ethtool_stats_keys[][ET {"PR5 free_swqes"}, {"PR6 free_swqes"}, {"PR7 free_swqes"}, + {"LRO aggregated"}, + {"LRO flushed"}, + {"LRO no_desc"}, }; static void ehea_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -239,6 +242,18 @@ static void ehea_get_ethtool_stats(struc for (k = 0; k < 8; k++) data[i++] = atomic_read(&port->port_res[k].swqe_avail); + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].lro_mgr.stats.aggregated; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].lro_mgr.stats.flushed; + data[i++] = tmp; + + for (k = 0, tmp = 0; k < EHEA_MAX_PORT_RES; k++) + tmp |= port->port_res[k].lro_mgr.stats.no_desc; + data[i++] = tmp; + } const struct ethtool_ops ehea_ethtool_ops = { diff -puN drivers/net/ehea/ehea_main.c~git-net drivers/net/ehea/ehea_main.c --- a/drivers/net/ehea/ehea_main.c~git-net +++ a/drivers/net/ehea/ehea_main.c @@ -52,6 +52,8 @@ static int rq2_entries = EHEA_DEF_ENTRIE static int rq3_entries = EHEA_DEF_ENTRIES_RQ3; static int sq_entries = EHEA_DEF_ENTRIES_SQ; static int use_mcs = 0; +static int use_lro = 0; +static int lro_max_aggr = EHEA_LRO_MAX_AGGR; static int num_tx_qps = EHEA_NUM_TX_QP; module_param(msg_level, int, 0); @@ -60,6 +62,8 @@ module_param(rq2_entries, int, 0); module_param(rq3_entries, int, 0); module_param(sq_entries, int, 0); module_param(use_mcs, int, 0); +module_param(use_lro, int, 0); +module_param(lro_max_aggr, int, 0); module_param(num_tx_qps, int, 0); MODULE_PARM_DESC(num_tx_qps, "Number of TX-QPS"); @@ -78,6 +82,11 @@ MODULE_PARM_DESC(sq_entries, " Number of __MODULE_STRING(EHEA_DEF_ENTRIES_SQ) ")"); MODULE_PARM_DESC(use_mcs, " 0:NAPI, 1:Multiple receive queues, Default = 0 "); +MODULE_PARM_DESC(lro_max_aggr, " LRO: Max packets to be aggregated. Default = " + __MODULE_STRING(EHEA_LRO_MAX_AGGR)); +MODULE_PARM_DESC(use_lro, " Large Receive Offload, 1: enable, 0: disable, " + "Default = 0"); + static int port_name_cnt = 0; static LIST_HEAD(adapter_list); u64 ehea_driver_flags = 0; @@ -389,9 +398,63 @@ static int ehea_treat_poll_error(struct return 0; } -static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev, - struct ehea_port_res *pr, - int *budget) +static int get_skb_hdr(struct sk_buff *skb, void **iphdr, + void **tcph, u64 *hdr_flags, void *priv) +{ + struct ehea_cqe *cqe = priv; + unsigned int ip_len; + struct iphdr *iph; + + /* non tcp/udp packets */ + if (!cqe->header_length) + return -1; + + /* non tcp packet */ + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->protocol != IPPROTO_TCP) + return -1; + + ip_len = ip_hdrlen(skb); + skb_set_transport_header(skb, ip_len); + *tcph = tcp_hdr(skb); + + /* check if ip header and tcp header are complete */ + if (iph->tot_len < ip_len + tcp_hdrlen(skb)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + *iphdr = iph; + + return 0; +} + +static void ehea_proc_skb(struct ehea_port_res *pr, struct ehea_cqe *cqe, + struct sk_buff *skb) +{ + int vlan_extracted = (cqe->status & EHEA_CQE_VLAN_TAG_XTRACT) + && pr->port->vgrp; + + if (use_lro) { + if (vlan_extracted) + lro_vlan_hwaccel_receive_skb(&pr->lro_mgr, skb, + pr->port->vgrp, + cqe->vlan_tag, + cqe); + else + lro_receive_skb(&pr->lro_mgr, skb, cqe); + } else { + if (vlan_extracted) + vlan_hwaccel_receive_skb(skb, pr->port->vgrp, + cqe->vlan_tag); + else + netif_receive_skb(skb); + } +} + +static int ehea_proc_rwqes(struct net_device *dev, + struct ehea_port_res *pr, + int budget) { struct ehea_port *port = pr->port; struct ehea_qp *qp = pr->qp; @@ -404,18 +467,16 @@ static struct ehea_cqe *ehea_proc_rwqes( int skb_arr_rq2_len = pr->rq2_skba.len; int skb_arr_rq3_len = pr->rq3_skba.len; int processed, processed_rq1, processed_rq2, processed_rq3; - int wqe_index, last_wqe_index, rq, my_quota, port_reset; + int wqe_index, last_wqe_index, rq, port_reset; processed = processed_rq1 = processed_rq2 = processed_rq3 = 0; last_wqe_index = 0; - my_quota = min(*budget, dev->quota); cqe = ehea_poll_rq1(qp, &wqe_index); - while ((my_quota > 0) && cqe) { + while ((processed < budget) && cqe) { ehea_inc_rq1(qp); processed_rq1++; processed++; - my_quota--; if (netif_msg_rx_status(port)) ehea_dump(cqe, sizeof(*cqe), "CQE"); @@ -430,14 +491,14 @@ static struct ehea_cqe *ehea_proc_rwqes( if (netif_msg_rx_err(port)) ehea_error("LL rq1: skb=NULL"); - skb = netdev_alloc_skb(port->netdev, + skb = netdev_alloc_skb(dev, EHEA_L_PKT_SIZE); if (!skb) break; } skb_copy_to_linear_data(skb, ((char*)cqe) + 64, cqe->num_bytes_transfered - 4); - ehea_fill_skb(port->netdev, skb, cqe); + ehea_fill_skb(dev, skb, cqe); } else if (rq == 2) { /* RQ2 */ skb = get_skb_by_index(skb_arr_rq2, skb_arr_rq2_len, cqe); @@ -446,7 +507,7 @@ static struct ehea_cqe *ehea_proc_rwqes( ehea_error("rq2: skb=NULL"); break; } - ehea_fill_skb(port->netdev, skb, cqe); + ehea_fill_skb(dev, skb, cqe); processed_rq2++; } else { /* RQ3 */ skb = get_skb_by_index(skb_arr_rq3, @@ -456,17 +517,11 @@ static struct ehea_cqe *ehea_proc_rwqes( ehea_error("rq3: skb=NULL"); break; } - ehea_fill_skb(port->netdev, skb, cqe); + ehea_fill_skb(dev, skb, cqe); processed_rq3++; } - if ((cqe->status & EHEA_CQE_VLAN_TAG_XTRACT) - && port->vgrp) - vlan_hwaccel_receive_skb(skb, port->vgrp, - cqe->vlan_tag); - else - netif_receive_skb(skb); - + ehea_proc_skb(pr, cqe, skb); dev->last_rx = jiffies; } else { pr->p_stats.poll_receive_errors++; @@ -478,16 +533,16 @@ static struct ehea_cqe *ehea_proc_rwqes( } cqe = ehea_poll_rq1(qp, &wqe_index); } + if (use_lro) + lro_flush_all(&pr->lro_mgr); pr->rx_packets += processed; - *budget -= processed; ehea_refill_rq1(pr, last_wqe_index, processed_rq1); ehea_refill_rq2(pr, processed_rq2); ehea_refill_rq3(pr, processed_rq3); - cqe = ehea_poll_rq1(qp, &wqe_index); - return cqe; + return processed; } static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota) @@ -551,12 +606,13 @@ static struct ehea_cqe *ehea_proc_cqes(s #define EHEA_NAPI_POLL_NUM_BEFORE_IRQ 16 -static int ehea_poll(struct net_device *dev, int *budget) +static int ehea_poll(struct napi_struct *napi, int budget) { - struct ehea_port_res *pr = dev->priv; + struct ehea_port_res *pr = container_of(napi, struct ehea_port_res, napi); + struct net_device *dev = pr->port->netdev; struct ehea_cqe *cqe; struct ehea_cqe *cqe_skb = NULL; - int force_irq, wqe_index; + int force_irq, wqe_index, rx; cqe = ehea_poll_rq1(pr->qp, &wqe_index); cqe_skb = ehea_poll_cq(pr->send_cq); @@ -565,7 +621,7 @@ static int ehea_poll(struct net_device * if ((!cqe && !cqe_skb) || force_irq) { pr->poll_counter = 0; - netif_rx_complete(dev); + netif_rx_complete(dev, napi); ehea_reset_cq_ep(pr->recv_cq); ehea_reset_cq_ep(pr->send_cq); ehea_reset_cq_n1(pr->recv_cq); @@ -576,41 +632,36 @@ static int ehea_poll(struct net_device * if (!cqe && !cqe_skb) return 0; - if (!netif_rx_reschedule(dev, dev->quota)) + if (!netif_rx_reschedule(napi)) return 0; } - cqe = ehea_proc_rwqes(dev, pr, budget); + rx = ehea_proc_rwqes(dev, pr, budget); + cqe = ehea_poll_rq1(pr->qp, &wqe_index); cqe_skb = ehea_proc_cqes(pr, 300); if (cqe || cqe_skb) pr->poll_counter++; - return 1; + return rx; } #ifdef CONFIG_NET_POLL_CONTROLLER static void ehea_netpoll(struct net_device *dev) { struct ehea_port *port = netdev_priv(dev); + int i; - netif_rx_schedule(port->port_res[0].d_netdev); + for (i = 0; i < port->num_def_qps; i++) + netif_rx_schedule(dev, &port->port_res[i].napi); } #endif -static int ehea_poll_firstqueue(struct net_device *dev, int *budget) -{ - struct ehea_port *port = netdev_priv(dev); - struct net_device *d_dev = port->port_res[0].d_netdev; - - return ehea_poll(d_dev, budget); -} - static irqreturn_t ehea_recv_irq_handler(int irq, void *param) { struct ehea_port_res *pr = param; - netif_rx_schedule(pr->d_netdev); + netif_rx_schedule(pr->port->netdev, &pr->napi); return IRQ_HANDLED; } @@ -1224,14 +1275,16 @@ static int ehea_init_port_res(struct ehe kfree(init_attr); - pr->d_netdev = alloc_netdev(0, "", ether_setup); - if (!pr->d_netdev) - goto out_free; - pr->d_netdev->priv = pr; - pr->d_netdev->weight = 64; - pr->d_netdev->poll = ehea_poll; - set_bit(__LINK_STATE_START, &pr->d_netdev->state); - strcpy(pr->d_netdev->name, port->netdev->name); + netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64); + + pr->lro_mgr.max_aggr = pr->port->lro_max_aggr; + pr->lro_mgr.max_desc = MAX_LRO_DESCRIPTORS; + pr->lro_mgr.lro_arr = pr->lro_desc; + pr->lro_mgr.get_skb_header = get_skb_hdr; + pr->lro_mgr.features = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID; + pr->lro_mgr.dev = port->netdev; + pr->lro_mgr.ip_summed = CHECKSUM_UNNECESSARY; + pr->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY; ret = 0; goto out; @@ -1254,8 +1307,6 @@ static int ehea_clean_portres(struct ehe { int ret, i; - free_netdev(pr->d_netdev); - ret = ehea_destroy_qp(pr->qp); if (!ret) { @@ -2247,17 +2298,35 @@ static int ehea_open(struct net_device * ehea_info("enabling port %s", dev->name); ret = ehea_up(dev); - if (!ret) + if (!ret) { + port_napi_enable(port); netif_start_queue(dev); + } up(&port->port_lock); return ret; } +static void port_napi_disable(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps; i++) + napi_disable(&port->port_res[i].napi); +} + +static void port_napi_enable(struct ehea_port *port) +{ + int i; + + for (i = 0; i < port->num_def_qps; i++) + napi_enable(&port->port_res[i].napi); +} + static int ehea_down(struct net_device *dev) { - int ret, i; + int ret; struct ehea_port *port = netdev_priv(dev); if (port->state == EHEA_PORT_DOWN) @@ -2266,10 +2335,7 @@ static int ehea_down(struct net_device * ehea_drop_multicast_list(dev); ehea_free_interrupts(dev); - for (i = 0; i < port->num_def_qps; i++) - while (test_bit(__LINK_STATE_RX_SCHED, - &port->port_res[i].d_netdev->state)) - msleep(1); + port_napi_disable(port); port->state = EHEA_PORT_DOWN; @@ -2307,7 +2373,8 @@ static void ehea_reset_port(struct work_ port->resets++; down(&port->port_lock); netif_stop_queue(dev); - netif_poll_disable(dev); + + port_napi_disable(port); ehea_down(dev); @@ -2318,7 +2385,8 @@ static void ehea_reset_port(struct work_ if (netif_msg_timer(port)) ehea_info("Device %s resetted successfully", dev->name); - netif_poll_enable(dev); + port_napi_enable(port); + netif_wake_queue(dev); out: up(&port->port_lock); @@ -2346,7 +2414,9 @@ static void ehea_rereg_mrs(struct work_s dev->name); down(&port->port_lock); netif_stop_queue(dev); - netif_poll_disable(dev); + + port_napi_disable(port); + ehea_down(dev); up(&port->port_lock); } @@ -2394,7 +2464,7 @@ static void ehea_rereg_mrs(struct work_s ret = ehea_up(dev); if (!ret) { - netif_poll_enable(dev); + port_napi_enable(port); netif_wake_queue(dev); } @@ -2632,11 +2702,9 @@ struct ehea_port *ehea_setup_single_port memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN); dev->open = ehea_open; - dev->poll = ehea_poll_firstqueue; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = ehea_netpoll; #endif - dev->weight = 64; dev->stop = ehea_stop; dev->hard_start_xmit = ehea_start_xmit; dev->get_stats = ehea_get_stats; @@ -2669,6 +2737,8 @@ struct ehea_port *ehea_setup_single_port goto out_dereg_bc; } + port->lro_max_aggr = lro_max_aggr; + ret = ehea_get_jumboframe_status(port, &jumbo); if (ret) ehea_error("failed determining jumbo frame status for %s", diff -puN drivers/net/epic100.c~git-net drivers/net/epic100.c --- a/drivers/net/epic100.c~git-net +++ a/drivers/net/epic100.c @@ -262,6 +262,7 @@ struct epic_private { /* Ring pointers. */ spinlock_t lock; /* Group with Tx control cache line. */ spinlock_t napi_lock; + struct napi_struct napi; unsigned int reschedule_in_poll; unsigned int cur_tx, dirty_tx; @@ -294,7 +295,7 @@ static void epic_tx_timeout(struct net_d static void epic_init_ring(struct net_device *dev); static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev); static int epic_rx(struct net_device *dev, int budget); -static int epic_poll(struct net_device *dev, int *budget); +static int epic_poll(struct napi_struct *napi, int budget); static irqreturn_t epic_interrupt(int irq, void *dev_instance); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static const struct ethtool_ops netdev_ethtool_ops; @@ -487,8 +488,7 @@ static int __devinit epic_init_one (stru dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; dev->tx_timeout = &epic_tx_timeout; - dev->poll = epic_poll; - dev->weight = 64; + netif_napi_add(dev, &ep->napi, epic_poll, 64); ret = register_netdev(dev); if (ret < 0) @@ -660,8 +660,11 @@ static int epic_open(struct net_device * /* Soft reset the chip. */ outl(0x4001, ioaddr + GENCTL); - if ((retval = request_irq(dev->irq, &epic_interrupt, IRQF_SHARED, dev->name, dev))) + napi_enable(&ep->napi); + if ((retval = request_irq(dev->irq, &epic_interrupt, IRQF_SHARED, dev->name, dev))) { + napi_disable(&ep->napi); return retval; + } epic_init_ring(dev); @@ -1103,9 +1106,9 @@ static irqreturn_t epic_interrupt(int ir if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) { spin_lock(&ep->napi_lock); - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &ep->napi)) { epic_napi_irq_off(dev, ep); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &ep->napi); } else ep->reschedule_in_poll++; spin_unlock(&ep->napi_lock); @@ -1257,26 +1260,22 @@ static void epic_rx_err(struct net_devic outw(RxQueued, ioaddr + COMMAND); } -static int epic_poll(struct net_device *dev, int *budget) +static int epic_poll(struct napi_struct *napi, int budget) { - struct epic_private *ep = dev->priv; - int work_done = 0, orig_budget; + struct epic_private *ep = container_of(napi, struct epic_private, napi); + struct net_device *dev = ep->mii.dev; + int work_done = 0; long ioaddr = dev->base_addr; - orig_budget = (*budget > dev->quota) ? dev->quota : *budget; - rx_action: epic_tx(dev, ep); - work_done += epic_rx(dev, *budget); + work_done += epic_rx(dev, budget); epic_rx_err(dev, ep); - *budget -= work_done; - dev->quota -= work_done; - - if (netif_running(dev) && (work_done < orig_budget)) { + if (netif_running(dev) && (work_done < budget)) { unsigned long flags; int more; @@ -1286,7 +1285,7 @@ rx_action: more = ep->reschedule_in_poll; if (!more) { - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); outl(EpicNapiEvent, ioaddr + INTSTAT); epic_napi_irq_on(dev, ep); } else @@ -1298,7 +1297,7 @@ rx_action: goto rx_action; } - return (work_done >= orig_budget); + return work_done; } static int epic_close(struct net_device *dev) @@ -1309,6 +1308,7 @@ static int epic_close(struct net_device int i; netif_stop_queue(dev); + napi_disable(&ep->napi); if (debug > 1) printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", diff -puN drivers/net/fec_8xx/fec_8xx.h~git-net drivers/net/fec_8xx/fec_8xx.h --- a/drivers/net/fec_8xx/fec_8xx.h~git-net +++ a/drivers/net/fec_8xx/fec_8xx.h @@ -105,6 +105,8 @@ struct fec; struct fec_enet_private { spinlock_t lock; /* during all ops except TX pckt processing */ spinlock_t tx_lock; /* during fec_start_xmit and fec_tx */ + struct net_device *dev; + struct napi_struct napi; int fecno; struct fec *fecp; const struct fec_platform_info *fpi; diff -puN drivers/net/fec_8xx/fec_main.c~git-net drivers/net/fec_8xx/fec_main.c --- a/drivers/net/fec_8xx/fec_main.c~git-net +++ a/drivers/net/fec_8xx/fec_main.c @@ -465,9 +465,9 @@ void fec_stop(struct net_device *dev) } /* common receive function */ -static int fec_enet_rx_common(struct net_device *dev, int *budget) +static int fec_enet_rx_common(struct fec_enet_private *ep, + struct net_device *dev, int budget) { - struct fec_enet_private *fep = netdev_priv(dev); fec_t *fecp = fep->fecp; const struct fec_platform_info *fpi = fep->fpi; cbd_t *bdp; @@ -475,11 +475,8 @@ static int fec_enet_rx_common(struct net int received = 0; __u16 pkt_len, sc; int curidx; - int rx_work_limit; if (fpi->use_napi) { - rx_work_limit = min(dev->quota, *budget); - if (!netif_running(dev)) return 0; } @@ -530,11 +527,6 @@ static int fec_enet_rx_common(struct net BUG_ON(skbn == NULL); } else { - - /* napi, got packet but no quota */ - if (fpi->use_napi && --rx_work_limit < 0) - break; - skb = fep->rx_skbuff[curidx]; BUG_ON(skb == NULL); @@ -599,25 +591,24 @@ static int fec_enet_rx_common(struct net * able to keep up at the expense of system resources. */ FW(fecp, r_des_active, 0x01000000); + + if (received >= budget) + break; + } fep->cur_rx = bdp; if (fpi->use_napi) { - dev->quota -= received; - *budget -= received; - - if (rx_work_limit < 0) - return 1; /* not done */ + if (received < budget) { + netif_rx_complete(dev, &fep->napi); - /* done */ - netif_rx_complete(dev); - - /* enable RX interrupt bits */ - FS(fecp, imask, FEC_ENET_RXF | FEC_ENET_RXB); + /* enable RX interrupt bits */ + FS(fecp, imask, FEC_ENET_RXF | FEC_ENET_RXB); + } } - return 0; + return received; } static void fec_enet_tx(struct net_device *dev) @@ -743,12 +734,12 @@ fec_enet_interrupt(int irq, void *dev_id if ((int_events & FEC_ENET_RXF) != 0) { if (!fpi->use_napi) - fec_enet_rx_common(dev, NULL); + fec_enet_rx_common(fep, dev, ~0); else { - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &fep->napi)) { /* disable rx interrupts */ FC(fecp, imask, FEC_ENET_RXF | FEC_ENET_RXB); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &fep->napi); } else { printk(KERN_ERR DRV_MODULE_NAME ": %s driver bug! interrupt while in poll!\n", @@ -893,10 +884,13 @@ static int fec_enet_open(struct net_devi const struct fec_platform_info *fpi = fep->fpi; unsigned long flags; + napi_enable(&fep->napi); + /* Install our interrupt handler. */ if (request_irq(fpi->fec_irq, fec_enet_interrupt, 0, "fec", dev) != 0) { printk(KERN_ERR DRV_MODULE_NAME ": %s Could not allocate FEC IRQ!", dev->name); + napi_disable(&fep->napi); return -EINVAL; } @@ -907,6 +901,7 @@ static int fec_enet_open(struct net_devi printk(KERN_ERR DRV_MODULE_NAME ": %s Could not allocate PHY IRQ!", dev->name); free_irq(fpi->fec_irq, dev); + napi_disable(&fep->napi); return -EINVAL; } @@ -932,6 +927,7 @@ static int fec_enet_close(struct net_dev unsigned long flags; netif_stop_queue(dev); + napi_disable(&fep->napi); netif_carrier_off(dev); if (fpi->use_mdio) @@ -955,9 +951,12 @@ static struct net_device_stats *fec_enet return &fep->stats; } -static int fec_enet_poll(struct net_device *dev, int *budget) +static int fec_enet_poll(struct napi_struct *napi, int budget) { - return fec_enet_rx_common(dev, budget); + struct fec_enet_private *fep = container_of(napi, struct fec_enet_private, napi); + struct net_device *dev = fep->dev; + + return fec_enet_rx_common(fep, dev, budget); } /*************************************************************************/ @@ -1107,6 +1106,7 @@ int fec_8xx_init_one(const struct fec_pl SET_MODULE_OWNER(dev); fep = netdev_priv(dev); + fep->dev = dev; /* partial reset of FEC */ fec_whack_reset(fecp); @@ -1172,10 +1172,9 @@ int fec_8xx_init_one(const struct fec_pl dev->get_stats = fec_enet_get_stats; dev->set_multicast_list = fec_set_multicast_list; dev->set_mac_address = fec_set_mac_address; - if (fpi->use_napi) { - dev->poll = fec_enet_poll; - dev->weight = fpi->napi_weight; - } + netif_napi_add(dev, &fec->napi, + fec_enet_poll, fpi->napi_weight); + dev->ethtool_ops = &fec_ethtool_ops; dev->do_ioctl = fec_ioctl; diff -puN drivers/net/forcedeth.c~git-net drivers/net/forcedeth.c --- a/drivers/net/forcedeth.c~git-net +++ a/drivers/net/forcedeth.c @@ -159,6 +159,8 @@ #define dprintk(x...) do { } while (0) #endif +#define TX_WORK_PER_LOOP 64 +#define RX_WORK_PER_LOOP 64 /* * Hardware access: @@ -745,6 +747,9 @@ struct nv_skb_map { struct fe_priv { spinlock_t lock; + struct net_device *dev; + struct napi_struct napi; + /* General data: * Locking: spin_lock(&np->lock); */ struct net_device_stats stats; @@ -1586,9 +1591,10 @@ static int nv_alloc_rx_optimized(struct static void nv_do_rx_refill(unsigned long data) { struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); /* Just reschedule NAPI rx processing */ - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } #else static void nv_do_rx_refill(unsigned long data) @@ -2997,7 +3003,7 @@ static irqreturn_t nv_nic_irq(int foo, v #ifdef CONFIG_FORCEDETH_NAPI if (events & NVREG_IRQ_RX_ALL) { - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); /* Disable furthur receive irq's */ spin_lock(&np->lock); @@ -3010,7 +3016,7 @@ static irqreturn_t nv_nic_irq(int foo, v spin_unlock(&np->lock); } #else - if (nv_rx_process(dev, dev->weight)) { + if (nv_rx_process(dev, RX_WORK_PER_LOOP)) { if (unlikely(nv_alloc_rx(dev))) { spin_lock(&np->lock); if (!np->in_shutdown) @@ -3079,8 +3085,6 @@ static irqreturn_t nv_nic_irq(int foo, v return IRQ_RETVAL(i); } -#define TX_WORK_PER_LOOP 64 -#define RX_WORK_PER_LOOP 64 /** * All _optimized functions are used to help increase performance * (reduce CPU and increase throughput). They use descripter version 3, @@ -3114,7 +3118,7 @@ static irqreturn_t nv_nic_irq_optimized( #ifdef CONFIG_FORCEDETH_NAPI if (events & NVREG_IRQ_RX_ALL) { - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); /* Disable furthur receive irq's */ spin_lock(&np->lock); @@ -3127,7 +3131,7 @@ static irqreturn_t nv_nic_irq_optimized( spin_unlock(&np->lock); } #else - if (nv_rx_process_optimized(dev, dev->weight)) { + if (nv_rx_process_optimized(dev, RX_WORK_PER_LOOP)) { if (unlikely(nv_alloc_rx_optimized(dev))) { spin_lock(&np->lock); if (!np->in_shutdown) @@ -3245,19 +3249,19 @@ static irqreturn_t nv_nic_irq_tx(int foo } #ifdef CONFIG_FORCEDETH_NAPI -static int nv_napi_poll(struct net_device *dev, int *budget) +static int nv_napi_poll(struct napi_struct *napi, int budget) { - int pkts, limit = min(*budget, dev->quota); - struct fe_priv *np = netdev_priv(dev); + struct fe_priv *np = container_of(napi, struct fe_priv, napi); + struct net_device *dev = np->dev; u8 __iomem *base = get_hwbase(dev); unsigned long flags; - int retcode; + int pkts, retcode; if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) { - pkts = nv_rx_process(dev, limit); + pkts = nv_rx_process(dev, budget); retcode = nv_alloc_rx(dev); } else { - pkts = nv_rx_process_optimized(dev, limit); + pkts = nv_rx_process_optimized(dev, budget); retcode = nv_alloc_rx_optimized(dev); } @@ -3268,13 +3272,12 @@ static int nv_napi_poll(struct net_devic spin_unlock_irqrestore(&np->lock, flags); } - if (pkts < limit) { - /* all done, no more packets present */ - netif_rx_complete(dev); - + if (pkts < budget) { /* re-enable receive interrupts */ spin_lock_irqsave(&np->lock, flags); + __netif_rx_complete(dev, napi); + np->irqmask |= NVREG_IRQ_RX_ALL; if (np->msi_flags & NV_MSI_X_ENABLED) writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask); @@ -3282,13 +3285,8 @@ static int nv_napi_poll(struct net_devic writel(np->irqmask, base + NvRegIrqMask); spin_unlock_irqrestore(&np->lock, flags); - return 0; - } else { - /* used up our quantum, so reschedule */ - dev->quota -= pkts; - *budget -= pkts; - return 1; } + return pkts; } #endif @@ -3296,6 +3294,7 @@ static int nv_napi_poll(struct net_devic static irqreturn_t nv_nic_irq_rx(int foo, void *data) { struct net_device *dev = (struct net_device *) data; + struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); u32 events; @@ -3303,7 +3302,7 @@ static irqreturn_t nv_nic_irq_rx(int foo writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus); if (events) { - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); /* disable receive interrupts on the nic */ writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask); pci_push(base); @@ -3329,7 +3328,7 @@ static irqreturn_t nv_nic_irq_rx(int foo if (!(events & np->irqmask)) break; - if (nv_rx_process_optimized(dev, dev->weight)) { + if (nv_rx_process_optimized(dev, RX_WORK_PER_LOOP)) { if (unlikely(nv_alloc_rx_optimized(dev))) { spin_lock_irqsave(&np->lock, flags); if (!np->in_shutdown) @@ -4620,7 +4619,9 @@ static void nv_self_test(struct net_devi if (test->flags & ETH_TEST_FL_OFFLINE) { if (netif_running(dev)) { netif_stop_queue(dev); - netif_poll_disable(dev); +#ifdef CONFIG_FORCEDETH_NAPI + napi_disable(&np->napi); +#endif netif_tx_lock_bh(dev); spin_lock_irq(&np->lock); nv_disable_hw_interrupts(dev, np->irqmask); @@ -4679,7 +4680,9 @@ static void nv_self_test(struct net_devi nv_start_rx(dev); nv_start_tx(dev); netif_start_queue(dev); - netif_poll_enable(dev); +#ifdef CONFIG_FORCEDETH_NAPI + napi_enable(&np->napi); +#endif nv_enable_hw_interrupts(dev, np->irqmask); } } @@ -4911,7 +4914,9 @@ static int nv_open(struct net_device *de nv_start_rx(dev); nv_start_tx(dev); netif_start_queue(dev); - netif_poll_enable(dev); +#ifdef CONFIG_FORCEDETH_NAPI + napi_enable(&np->napi); +#endif if (ret) { netif_carrier_on(dev); @@ -4942,7 +4947,9 @@ static int nv_close(struct net_device *d spin_lock_irq(&np->lock); np->in_shutdown = 1; spin_unlock_irq(&np->lock); - netif_poll_disable(dev); +#ifdef CONFIG_FORCEDETH_NAPI + napi_disable(&np->napi); +#endif synchronize_irq(dev->irq); del_timer_sync(&np->oom_kick); @@ -4994,6 +5001,7 @@ static int __devinit nv_probe(struct pci goto out; np = netdev_priv(dev); + np->dev = dev; np->pci_dev = pci_dev; spin_lock_init(&np->lock); SET_MODULE_OWNER(dev); @@ -5155,9 +5163,8 @@ static int __devinit nv_probe(struct pci #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = nv_poll_controller; #endif - dev->weight = RX_WORK_PER_LOOP; #ifdef CONFIG_FORCEDETH_NAPI - dev->poll = nv_napi_poll; + netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP); #endif SET_ETHTOOL_OPS(dev, &ops); dev->tx_timeout = nv_tx_timeout; diff -puN drivers/net/fs_enet/fs_enet-main.c~git-net drivers/net/fs_enet/fs_enet-main.c --- a/drivers/net/fs_enet/fs_enet-main.c~git-net +++ a/drivers/net/fs_enet/fs_enet-main.c @@ -68,18 +68,16 @@ static void fs_set_multicast_list(struct } /* NAPI receive function */ -static int fs_enet_rx_napi(struct net_device *dev, int *budget) +static int fs_enet_rx_napi(struct napi_struct *napi, int budget) { - struct fs_enet_private *fep = netdev_priv(dev); + struct fs_enet_private *fep = container_of(napi, struct fec_enet_private, napi); + struct net_device *dev = fep->dev; const struct fs_platform_info *fpi = fep->fpi; cbd_t *bdp; struct sk_buff *skb, *skbn, *skbt; int received = 0; u16 pkt_len, sc; int curidx; - int rx_work_limit = 0; /* pacify gcc */ - - rx_work_limit = min(dev->quota, *budget); if (!netif_running(dev)) return 0; @@ -94,7 +92,6 @@ static int fs_enet_rx_napi(struct net_de (*fep->ops->napi_clear_rx_event)(dev); while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) { - curidx = bdp - fep->rx_bd_base; /* @@ -134,11 +131,6 @@ static int fs_enet_rx_napi(struct net_de skbn = skb; } else { - - /* napi, got packet but no quota */ - if (--rx_work_limit < 0) - break; - skb = fep->rx_skbuff[curidx]; dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), @@ -197,22 +189,19 @@ static int fs_enet_rx_napi(struct net_de bdp = fep->rx_bd_base; (*fep->ops->rx_bd_done)(dev); + + if (received >= budget) + break; } fep->cur_rx = bdp; - dev->quota -= received; - *budget -= received; - - if (rx_work_limit < 0) - return 1; /* not done */ - - /* done */ - netif_rx_complete(dev); - - (*fep->ops->napi_enable_rx)(dev); - - return 0; + if (received >= budget) { + /* done */ + netif_rx_complete(dev, napi); + (*fep->ops->napi_enable_rx)(dev); + } + return received; } /* non NAPI receive function */ @@ -468,7 +457,7 @@ fs_enet_interrupt(int irq, void *dev_id) if (!fpi->use_napi) fs_enet_rx_non_napi(dev); else { - napi_ok = netif_rx_schedule_prep(dev); + napi_ok = napi_schedule_prep(&fep->napi); (*fep->ops->napi_disable_rx)(dev); (*fep->ops->clear_int_events)(dev, fep->ev_napi_rx); @@ -476,7 +465,7 @@ fs_enet_interrupt(int irq, void *dev_id) /* NOTE: it is possible for FCCs in NAPI mode */ /* to submit a spurious interrupt while in poll */ if (napi_ok) - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &fep->napi); } } @@ -797,18 +786,22 @@ static int fs_enet_open(struct net_devic int r; int err; + napi_enable(&fep->napi); + /* Install our interrupt handler. */ r = fs_request_irq(dev, fep->interrupt, "fs_enet-mac", fs_enet_interrupt); if (r != 0) { printk(KERN_ERR DRV_MODULE_NAME ": %s Could not allocate FS_ENET IRQ!", dev->name); + napi_disable(&fep->napi); return -EINVAL; } err = fs_init_phy(dev); - if(err) + if(err) { + napi_disable(&fep->napi); return err; - + } phy_start(fep->phydev); return 0; @@ -821,6 +814,7 @@ static int fs_enet_close(struct net_devi netif_stop_queue(dev); netif_carrier_off(dev); + napi_disable(&fep->napi); phy_stop(fep->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -1044,10 +1038,9 @@ static struct net_device *fs_init_instan ndev->stop = fs_enet_close; ndev->get_stats = fs_enet_get_stats; ndev->set_multicast_list = fs_set_multicast_list; - if (fpi->use_napi) { - ndev->poll = fs_enet_rx_napi; - ndev->weight = fpi->napi_weight; - } + netif_napi_add(ndev, &fep->napi, + fs_enet_rx_napi, fpi->napi_weight); + ndev->ethtool_ops = &fs_ethtool_ops; ndev->do_ioctl = fs_ioctl; diff -puN drivers/net/fs_enet/fs_enet.h~git-net drivers/net/fs_enet/fs_enet.h --- a/drivers/net/fs_enet/fs_enet.h~git-net +++ a/drivers/net/fs_enet/fs_enet.h @@ -121,6 +121,7 @@ struct fs_enet_mii_bus { }; struct fs_enet_private { + struct napi_struct napi; struct device *dev; /* pointer back to the device (must be initialized first) */ spinlock_t lock; /* during all ops except TX pckt processing */ spinlock_t tx_lock; /* during fs_start_xmit and fs_tx */ diff -puN drivers/net/gianfar.c~git-net drivers/net/gianfar.c --- a/drivers/net/gianfar.c~git-net +++ a/drivers/net/gianfar.c @@ -134,7 +134,7 @@ static void gfar_configure_serdes(struct extern int gfar_local_mdio_write(struct gfar_mii *regs, int mii_id, int regnum, u16 value); extern int gfar_local_mdio_read(struct gfar_mii *regs, int mii_id, int regnum); #ifdef CONFIG_GFAR_NAPI -static int gfar_poll(struct net_device *dev, int *budget); +static int gfar_poll(struct napi_struct *napi, int budget); #endif #ifdef CONFIG_NET_POLL_CONTROLLER static void gfar_netpoll(struct net_device *dev); @@ -188,6 +188,7 @@ static int gfar_probe(struct platform_de return -ENOMEM; priv = netdev_priv(dev); + priv->dev = dev; /* Set the info in the priv to the current info */ priv->einfo = einfo; @@ -261,10 +262,7 @@ static int gfar_probe(struct platform_de dev->hard_start_xmit = gfar_start_xmit; dev->tx_timeout = gfar_timeout; dev->watchdog_timeo = TX_TIMEOUT; -#ifdef CONFIG_GFAR_NAPI - dev->poll = gfar_poll; - dev->weight = GFAR_DEV_WEIGHT; -#endif + netif_napi_add(dev, &priv->napi, gfar_poll, GFAR_DEV_WEIGHT); #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = gfar_netpoll; #endif @@ -939,6 +937,8 @@ static int gfar_enet_open(struct net_dev { int err; + napi_enable(&priv->napi); + /* Initialize a bunch of registers */ init_registers(dev); @@ -946,10 +946,14 @@ static int gfar_enet_open(struct net_dev err = init_phy(dev); - if(err) + if(err) { + napi_disable(&priv->napi); return err; + } err = startup_gfar(dev); + if (err) + napi_disable(&priv->napi); netif_start_queue(dev); @@ -1102,6 +1106,9 @@ static int gfar_start_xmit(struct sk_buf static int gfar_close(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); + + napi_disable(&priv->napi); + stop_gfar(dev); /* Disconnect from the PHY */ @@ -1318,7 +1325,7 @@ struct sk_buff * gfar_new_skb(struct net return NULL; alignamount = RXBUF_ALIGNMENT - - (((unsigned) skb->data) & (RXBUF_ALIGNMENT - 1)); + (((unsigned long) skb->data) & (RXBUF_ALIGNMENT - 1)); /* We need the data buffer to be aligned properly. We will reserve * as many bytes as needed to align the data properly @@ -1390,12 +1397,12 @@ irqreturn_t gfar_receive(int irq, void * /* support NAPI */ #ifdef CONFIG_GFAR_NAPI - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &priv->napi)) { tempval = gfar_read(&priv->regs->imask); tempval &= IMASK_RX_DISABLED; gfar_write(&priv->regs->imask, tempval); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &priv->napi); } else { if (netif_msg_rx_err(priv)) printk(KERN_DEBUG "%s: receive called twice (%x)[%x]\n", @@ -1569,23 +1576,16 @@ int gfar_clean_rx_ring(struct net_device } #ifdef CONFIG_GFAR_NAPI -static int gfar_poll(struct net_device *dev, int *budget) +static int gfar_poll(struct napi_struct *napi, int budget) { + struct gfar_private *priv = container_of(napi, struct gfar_private, napi); + struct net_device *dev = priv->dev; int howmany; - struct gfar_private *priv = netdev_priv(dev); - int rx_work_limit = *budget; - - if (rx_work_limit > dev->quota) - rx_work_limit = dev->quota; - howmany = gfar_clean_rx_ring(dev, rx_work_limit); + howmany = gfar_clean_rx_ring(dev, budget); - dev->quota -= howmany; - rx_work_limit -= howmany; - *budget -= howmany; - - if (rx_work_limit > 0) { - netif_rx_complete(dev); + if (howmany < budget) { + netif_rx_complete(dev, napi); /* Clear the halt bit in RSTAT */ gfar_write(&priv->regs->rstat, RSTAT_CLEAR_RHALT); @@ -1601,8 +1601,7 @@ static int gfar_poll(struct net_device * gfar_write(&priv->regs->rxic, 0); } - /* Return 1 if there's more work to do */ - return (rx_work_limit > 0) ? 0 : 1; + return howmany; } #endif diff -puN drivers/net/gianfar.h~git-net drivers/net/gianfar.h --- a/drivers/net/gianfar.h~git-net +++ a/drivers/net/gianfar.h @@ -690,6 +690,9 @@ struct gfar_private { /* RX Locked fields */ spinlock_t rxlock; + struct net_device *dev; + struct napi_struct napi; + /* skb array and index */ struct sk_buff ** rx_skbuff; u16 skb_currx; diff -puN drivers/net/ibmveth.c~git-net drivers/net/ibmveth.c --- a/drivers/net/ibmveth.c~git-net +++ a/drivers/net/ibmveth.c @@ -83,7 +83,7 @@ static int ibmveth_open(struct net_device *dev); static int ibmveth_close(struct net_device *dev); static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); -static int ibmveth_poll(struct net_device *dev, int *budget); +static int ibmveth_poll(struct napi_struct *napi, int budget); static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *dev); static struct net_device_stats *ibmveth_get_stats(struct net_device *dev); static void ibmveth_set_multicast_list(struct net_device *dev); @@ -480,6 +480,8 @@ static int ibmveth_open(struct net_devic ibmveth_debug_printk("open starting\n"); + napi_enable(&adapter->napi); + for(i = 0; irx_buff_pool[i].size; @@ -489,6 +491,7 @@ static int ibmveth_open(struct net_devic if(!adapter->buffer_list_addr || !adapter->filter_list_addr) { ibmveth_error_printk("unable to allocate filter or buffer list pages\n"); ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); return -ENOMEM; } @@ -498,6 +501,7 @@ static int ibmveth_open(struct net_devic if(!adapter->rx_queue.queue_addr) { ibmveth_error_printk("unable to allocate rx queue pages\n"); ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); return -ENOMEM; } @@ -514,6 +518,7 @@ static int ibmveth_open(struct net_devic (dma_mapping_error(adapter->rx_queue.queue_dma))) { ibmveth_error_printk("unable to map filter or buffer list pages\n"); ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); return -ENOMEM; } @@ -545,6 +550,7 @@ static int ibmveth_open(struct net_devic rxq_desc.desc, mac_address); ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); return -ENONET; } @@ -555,6 +561,7 @@ static int ibmveth_open(struct net_devic ibmveth_error_printk("unable to alloc pool\n"); adapter->rx_buff_pool[i].active = 0; ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); return -ENOMEM ; } } @@ -567,6 +574,7 @@ static int ibmveth_open(struct net_devic } while (H_IS_LONG_BUSY(rc) || (rc == H_BUSY)); ibmveth_cleanup(adapter); + napi_disable(&adapter->napi); return rc; } @@ -587,6 +595,8 @@ static int ibmveth_close(struct net_devi ibmveth_debug_printk("close starting\n"); + napi_disable(&adapter->napi); + if (!adapter->pool_config) netif_stop_queue(netdev); @@ -767,80 +777,68 @@ out: spin_lock_irqsave(&adapter->stats_l return 0; } -static int ibmveth_poll(struct net_device *netdev, int *budget) +static int ibmveth_poll(struct napi_struct *napi, int budget) { - struct ibmveth_adapter *adapter = netdev->priv; - int max_frames_to_process = netdev->quota; + struct ibmveth_adapter *adapter = container_of(napi, struct ibmveth_adapter, napi); + struct net_device *netdev = adapter->netdev; int frames_processed = 0; - int more_work = 1; unsigned long lpar_rc; restart_poll: do { - struct net_device *netdev = adapter->netdev; - - if(ibmveth_rxq_pending_buffer(adapter)) { - struct sk_buff *skb; + struct sk_buff *skb; - rmb(); + if (!ibmveth_rxq_pending_buffer(adapter)) + break; - if(!ibmveth_rxq_buffer_valid(adapter)) { - wmb(); /* suggested by larson1 */ - adapter->rx_invalid_buffer++; - ibmveth_debug_printk("recycling invalid buffer\n"); - ibmveth_rxq_recycle_buffer(adapter); - } else { - int length = ibmveth_rxq_frame_length(adapter); - int offset = ibmveth_rxq_frame_offset(adapter); - skb = ibmveth_rxq_get_buffer(adapter); - - ibmveth_rxq_harvest_buffer(adapter); - - skb_reserve(skb, offset); - skb_put(skb, length); - skb->protocol = eth_type_trans(skb, netdev); - - netif_receive_skb(skb); /* send it up */ - - adapter->stats.rx_packets++; - adapter->stats.rx_bytes += length; - frames_processed++; - netdev->last_rx = jiffies; - } + rmb(); + if (!ibmveth_rxq_buffer_valid(adapter)) { + wmb(); /* suggested by larson1 */ + adapter->rx_invalid_buffer++; + ibmveth_debug_printk("recycling invalid buffer\n"); + ibmveth_rxq_recycle_buffer(adapter); } else { - more_work = 0; + int length = ibmveth_rxq_frame_length(adapter); + int offset = ibmveth_rxq_frame_offset(adapter); + skb = ibmveth_rxq_get_buffer(adapter); + + ibmveth_rxq_harvest_buffer(adapter); + + skb_reserve(skb, offset); + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, netdev); + + netif_receive_skb(skb); /* send it up */ + + adapter->stats.rx_packets++; + adapter->stats.rx_bytes += length; + frames_processed++; + netdev->last_rx = jiffies; } - } while(more_work && (frames_processed < max_frames_to_process)); + } while (frames_processed < budget); ibmveth_replenish_task(adapter); - if(more_work) { - /* more work to do - return that we are not done yet */ - netdev->quota -= frames_processed; - *budget -= frames_processed; - return 1; - } - - /* we think we are done - reenable interrupts, then check once more to make sure we are done */ - lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE); + if (frames_processed < budget) { + /* We think we are done - reenable interrupts, + * then check once more to make sure we are done. + */ + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_ENABLE); - ibmveth_assert(lpar_rc == H_SUCCESS); + ibmveth_assert(lpar_rc == H_SUCCESS); - netif_rx_complete(netdev); + netif_rx_complete(netdev, napi); - if(ibmveth_rxq_pending_buffer(adapter) && netif_rx_reschedule(netdev, frames_processed)) - { - lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); - ibmveth_assert(lpar_rc == H_SUCCESS); - more_work = 1; - goto restart_poll; + if (ibmveth_rxq_pending_buffer(adapter) && + netif_rx_reschedule(napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); + goto restart_poll; + } } - netdev->quota -= frames_processed; - *budget -= frames_processed; - - /* we really are done */ - return 0; + return frames_processed; } static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance) @@ -849,10 +847,11 @@ static irqreturn_t ibmveth_interrupt(int struct ibmveth_adapter *adapter = netdev->priv; unsigned long lpar_rc; - if(netif_rx_schedule_prep(netdev)) { - lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE); + if (netif_rx_schedule_prep(netdev, &adapter->napi)) { + lpar_rc = h_vio_signal(adapter->vdev->unit_address, + VIO_IRQ_DISABLE); ibmveth_assert(lpar_rc == H_SUCCESS); - __netif_rx_schedule(netdev); + __netif_rx_schedule(netdev, &adapter->napi); } return IRQ_HANDLED; } @@ -1004,6 +1003,8 @@ static int __devinit ibmveth_probe(struc adapter->mcastFilterSize= *mcastFilterSize_p; adapter->pool_config = 0; + netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); + /* Some older boxes running PHYP non-natively have an OF that returns a 8-byte local-mac-address field (and the first 2 bytes have to be ignored) while newer boxes' OF return @@ -1020,8 +1021,6 @@ static int __devinit ibmveth_probe(struc netdev->irq = dev->irq; netdev->open = ibmveth_open; - netdev->poll = ibmveth_poll; - netdev->weight = 16; netdev->stop = ibmveth_close; netdev->hard_start_xmit = ibmveth_start_xmit; netdev->get_stats = ibmveth_get_stats; diff -puN drivers/net/ibmveth.h~git-net drivers/net/ibmveth.h --- a/drivers/net/ibmveth.h~git-net +++ a/drivers/net/ibmveth.h @@ -112,6 +112,7 @@ struct ibmveth_rx_q { struct ibmveth_adapter { struct vio_dev *vdev; struct net_device *netdev; + struct napi_struct napi; struct net_device_stats stats; unsigned int mcastFilterSize; unsigned long mac_addr; diff -puN drivers/net/irda/Kconfig~git-net drivers/net/irda/Kconfig --- a/drivers/net/irda/Kconfig~git-net +++ a/drivers/net/irda/Kconfig @@ -162,7 +162,33 @@ config EP7211_DONGLE Say Y here if you want to build support for the Cirrus logic EP7211 chipset's infrared module. +config KSDAZZLE_DONGLE + tristate "KingSun Dazzle IrDA-USB dongle (EXPERIMENTAL)" + depends on IRDA && USB && EXPERIMENTAL + help + Say Y or M here if you want to build support for the KingSun Dazzle + IrDA-USB bridge device driver. + + This USB bridge does not conform to the IrDA-USB device class + specification, and therefore needs its own specific driver. This + dongle supports SIR speeds only (9600 through 115200 bps). + + To compile it as a module, choose M here: the module will be called + ksdazzle-sir. +config KS959_DONGLE + tristate "KingSun KS-959 IrDA-USB dongle (EXPERIMENTAL)" + depends on IRDA && USB && EXPERIMENTAL + help + Say Y or M here if you want to build support for the KingSun KS-959 + IrDA-USB bridge device driver. + + This USB bridge does not conform to the IrDA-USB device class + specification, and therefore needs its own specific driver. This + dongle supports SIR speeds only (9600 through 57600 bps). + + To compile it as a module, choose M here: the module will be called + ks959-sir. comment "Old SIR device drivers" diff -puN drivers/net/irda/Makefile~git-net drivers/net/irda/Makefile --- a/drivers/net/irda/Makefile~git-net +++ a/drivers/net/irda/Makefile @@ -47,6 +47,8 @@ obj-$(CONFIG_MA600_DONGLE) += ma600-sir. obj-$(CONFIG_TOIM3232_DONGLE) += toim3232-sir.o obj-$(CONFIG_EP7211_DONGLE) += ep7211-sir.o obj-$(CONFIG_KINGSUN_DONGLE) += kingsun-sir.o +obj-$(CONFIG_KSDAZZLE_DONGLE) += ksdazzle-sir.o +obj-$(CONFIG_KS959_DONGLE) += ks959-sir.o # The SIR helper module sir-dev-objs := sir_dev.o sir_dongle.o diff -puN /dev/null drivers/net/irda/ks959-sir.c --- /dev/null +++ a/drivers/net/irda/ks959-sir.c @@ -0,0 +1,939 @@ +/***************************************************************************** +* +* Filename: ks959-sir.c +* Version: 0.1.2 +* Description: Irda KingSun KS-959 USB Dongle +* Status: Experimental +* Author: Alex Villacís Lasso +* with help from Domen Puncer +* +* Based on stir4200, mcs7780, kingsun-sir drivers. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****************************************************************************/ + +/* + * Following is my most current (2007-07-17) understanding of how the Kingsun + * KS-959 dongle is supposed to work. This information was deduced by + * reverse-engineering and examining the USB traffic captured with USBSnoopy + * from the WinXP driver. Feel free to update here as more of the dongle is + * known. + * + * My most sincere thanks must go to Domen Puncer for + * invaluable help in cracking the obfuscation and padding required for this + * dongle. + * + * General: This dongle exposes one interface with one interrupt IN endpoint. + * However, the interrupt endpoint is NOT used at all for this dongle. Instead, + * this dongle uses control transfers for everything, including sending and + * receiving the IrDA frame data. Apparently the interrupt endpoint is just a + * dummy to ensure the dongle has a valid interface to present to the PC.And I + * thought the DonShine dongle was weird... In addition, this dongle uses + * obfuscation (?!?!), applied at the USB level, to hide the traffic, both sent + * and received, from the dongle. I call it obfuscation because the XOR keying + * and padding required to produce an USB traffic acceptable for the dongle can + * not be explained by any other technical requirement. + * + * Transmission: To transmit an IrDA frame, the driver must prepare a control + * URB with the following as a setup packet: + * bRequestType USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE + * bRequest 0x09 + * wValue + * wIndex 0x0000 + * wLength + * The payload packet must be manually wrapped and escaped (as in stir4200.c), + * then padded and obfuscated before being sent. Both padding and obfuscation + * are implemented in the procedure obfuscate_tx_buffer(). Suffice to say, the + * designer/programmer of the dongle used his name as a source for the + * obfuscation. WTF?! + * Apparently the dongle cannot handle payloads larger than 256 bytes. The + * driver has to perform fragmentation in order to send anything larger than + * this limit. + * + * Reception: To receive data, the driver must poll the dongle regularly (like + * kingsun-sir.c) with control URBs and the following as a setup packet: + * bRequestType USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE + * bRequest 0x01 + * wValue 0x0200 + * wIndex 0x0000 + * wLength 0x0800 (size of available buffer) + * If there is data to be read, it will be returned as the response payload. + * This data is (apparently) not padded, but it is obfuscated. To de-obfuscate + * it, the driver must XOR every byte, in sequence, with a value that starts at + * 1 and is incremented with each byte processed, and then with 0x55. The value + * incremented with each byte processed overflows as an unsigned char. The + * resulting bytes form a wrapped SIR frame that is unwrapped and unescaped + * as in stir4200.c The incremented value is NOT reset with each frame, but is + * kept across the entire session with the dongle. Also, the dongle inserts an + * extra garbage byte with value 0x95 (after decoding) every 0xff bytes, which + * must be skipped. + * + * Speed change: To change the speed of the dongle, the driver prepares a + * control URB with the following as a setup packet: + * bRequestType USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE + * bRequest 0x09 + * wValue 0x0200 + * wIndex 0x0001 + * wLength 0x0008 (length of the payload) + * The payload is a 8-byte record, apparently identical to the one used in + * drivers/usb/serial/cypress_m8.c to change speed: + * __u32 baudSpeed; + * unsigned int dataBits : 2; // 0 - 5 bits 3 - 8 bits + * unsigned int : 1; + * unsigned int stopBits : 1; + * unsigned int parityEnable : 1; + * unsigned int parityType : 1; + * unsigned int : 1; + * unsigned int reset : 1; + * unsigned char reserved[3]; // set to 0 + * + * For now only SIR speeds have been observed with this dongle. Therefore, + * nothing is known on what changes (if any) must be done to frame wrapping / + * unwrapping for higher than SIR speeds. This driver assumes no change is + * necessary and announces support for all the way to 57600 bps. Although the + * package announces support for up to 4MBps, tests with a Sony Ericcson K300 + * phone show corruption when receiving large frames at 115200 bps, the highest + * speed announced by the phone. However, transmission at 115200 bps is OK. Go + * figure. Since I don't know whether the phone or the dongle is at fault, max + * announced speed is 57600 bps until someone produces a device that can run + * at higher speeds with this dongle. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define KS959_VENDOR_ID 0x07d0 +#define KS959_PRODUCT_ID 0x4959 + +/* These are the currently known USB ids */ +static struct usb_device_id dongles[] = { + /* KingSun Co,Ltd IrDA/USB Bridge */ + {USB_DEVICE(KS959_VENDOR_ID, KS959_PRODUCT_ID)}, + {} +}; + +MODULE_DEVICE_TABLE(usb, dongles); + +#define KINGSUN_MTT 0x07 +#define KINGSUN_REQ_RECV 0x01 +#define KINGSUN_REQ_SEND 0x09 + +#define KINGSUN_RCV_FIFO_SIZE 2048 /* Max length we can receive */ +#define KINGSUN_SND_FIFO_SIZE 2048 /* Max packet we can send */ +#define KINGSUN_SND_PACKET_SIZE 256 /* Max packet dongle can handle */ + +struct ks959_speedparams { + __le32 baudrate; /* baud rate, little endian */ + __u8 flags; + __u8 reserved[3]; +} __attribute__ ((packed)); + +#define KS_DATA_5_BITS 0x00 +#define KS_DATA_6_BITS 0x01 +#define KS_DATA_7_BITS 0x02 +#define KS_DATA_8_BITS 0x03 + +#define KS_STOP_BITS_1 0x00 +#define KS_STOP_BITS_2 0x08 + +#define KS_PAR_DISABLE 0x00 +#define KS_PAR_EVEN 0x10 +#define KS_PAR_ODD 0x30 +#define KS_RESET 0x80 + +struct ks959_cb { + struct usb_device *usbdev; /* init: probe_irda */ + struct net_device *netdev; /* network layer */ + struct irlap_cb *irlap; /* The link layer we are binded to */ + struct net_device_stats stats; /* network statistics */ + struct qos_info qos; + + struct usb_ctrlrequest *tx_setuprequest; + struct urb *tx_urb; + __u8 *tx_buf_clear; + unsigned int tx_buf_clear_used; + unsigned int tx_buf_clear_sent; + __u8 *tx_buf_xored; + + struct usb_ctrlrequest *rx_setuprequest; + struct urb *rx_urb; + __u8 *rx_buf; + __u8 rx_variable_xormask; + iobuff_t rx_unwrap_buff; + struct timeval rx_time; + + struct usb_ctrlrequest *speed_setuprequest; + struct urb *speed_urb; + struct ks959_speedparams speedparams; + unsigned int new_speed; + + spinlock_t lock; + int receiving; +}; + +/* Procedure to perform the obfuscation/padding expected by the dongle + * + * buf_cleartext (IN) Cleartext version of the IrDA frame to transmit + * len_cleartext (IN) Length of the cleartext version of IrDA frame + * buf_xoredtext (OUT) Obfuscated version of frame built by proc + * len_maxbuf (OUT) Maximum space available at buf_xoredtext + * + * (return) length of obfuscated frame with padding + * + * If not enough space (as indicated by len_maxbuf vs. required padding), + * zero is returned + * + * The value of lookup_string is actually a required portion of the algorithm. + * Seems the designer of the dongle wanted to state who exactly is responsible + * for implementing obfuscation. Send your best (or other) wishes to him ]:-) + */ +static unsigned int obfuscate_tx_buffer(const __u8 * buf_cleartext, + unsigned int len_cleartext, + __u8 * buf_xoredtext, + unsigned int len_maxbuf) +{ + unsigned int len_xoredtext; + + /* Calculate required length with padding, check for necessary space */ + len_xoredtext = ((len_cleartext + 7) & ~0x7) + 0x10; + if (len_xoredtext <= len_maxbuf) { + static const __u8 lookup_string[] = "wangshuofei19710"; + __u8 xor_mask; + + /* Unlike the WinXP driver, we *do* clear out the padding */ + memset(buf_xoredtext, 0, len_xoredtext); + + xor_mask = lookup_string[(len_cleartext & 0x0f) ^ 0x06] ^ 0x55; + + while (len_cleartext-- > 0) { + *buf_xoredtext++ = *buf_cleartext++ ^ xor_mask; + } + } else { + len_xoredtext = 0; + } + return len_xoredtext; +} + +/* Callback transmission routine */ +static void ks959_speed_irq(struct urb *urb) +{ + /* unlink, shutdown, unplug, other nasties */ + if (urb->status != 0) { + err("ks959_speed_irq: urb asynchronously failed - %d", + urb->status); + } +} + +/* Send a control request to change speed of the dongle */ +static int ks959_change_speed(struct ks959_cb *kingsun, unsigned speed) +{ + static unsigned int supported_speeds[] = { 2400, 9600, 19200, 38400, + 57600, 115200, 576000, 1152000, 4000000, 0 + }; + int err; + unsigned int i; + + if (kingsun->speed_setuprequest == NULL || kingsun->speed_urb == NULL) + return -ENOMEM; + + /* Check that requested speed is among the supported ones */ + for (i = 0; supported_speeds[i] && supported_speeds[i] != speed; i++) ; + if (supported_speeds[i] == 0) + return -EOPNOTSUPP; + + memset(&(kingsun->speedparams), 0, sizeof(struct ks959_speedparams)); + kingsun->speedparams.baudrate = cpu_to_le32(speed); + kingsun->speedparams.flags = KS_DATA_8_BITS; + + /* speed_setuprequest pre-filled in ks959_probe */ + usb_fill_control_urb(kingsun->speed_urb, kingsun->usbdev, + usb_sndctrlpipe(kingsun->usbdev, 0), + (unsigned char *)kingsun->speed_setuprequest, + &(kingsun->speedparams), + sizeof(struct ks959_speedparams), ks959_speed_irq, + kingsun); + kingsun->speed_urb->status = 0; + err = usb_submit_urb(kingsun->speed_urb, GFP_ATOMIC); + + return err; +} + +/* Submit one fragment of an IrDA frame to the dongle */ +static void ks959_send_irq(struct urb *urb); +static int ks959_submit_tx_fragment(struct ks959_cb *kingsun) +{ + unsigned int padlen; + unsigned int wraplen; + int ret; + + /* Check whether current plaintext can produce a padded buffer that fits + within the range handled by the dongle */ + wraplen = (KINGSUN_SND_PACKET_SIZE & ~0x7) - 0x10; + if (wraplen > kingsun->tx_buf_clear_used) + wraplen = kingsun->tx_buf_clear_used; + + /* Perform dongle obfuscation. Also remove the portion of the frame that + was just obfuscated and will now be sent to the dongle. */ + padlen = obfuscate_tx_buffer(kingsun->tx_buf_clear, wraplen, + kingsun->tx_buf_xored, + KINGSUN_SND_PACKET_SIZE); + + /* Calculate how much data can be transmitted in this urb */ + kingsun->tx_setuprequest->wValue = cpu_to_le16(wraplen); + kingsun->tx_setuprequest->wLength = cpu_to_le16(padlen); + /* Rest of the fields were filled in ks959_probe */ + usb_fill_control_urb(kingsun->tx_urb, kingsun->usbdev, + usb_sndctrlpipe(kingsun->usbdev, 0), + (unsigned char *)kingsun->tx_setuprequest, + kingsun->tx_buf_xored, padlen, + ks959_send_irq, kingsun); + kingsun->tx_urb->status = 0; + ret = usb_submit_urb(kingsun->tx_urb, GFP_ATOMIC); + + /* Remember how much data was sent, in order to update at callback */ + kingsun->tx_buf_clear_sent = (ret == 0) ? wraplen : 0; + return ret; +} + +/* Callback transmission routine */ +static void ks959_send_irq(struct urb *urb) +{ + struct ks959_cb *kingsun = urb->context; + struct net_device *netdev = kingsun->netdev; + int ret = 0; + + /* in process of stopping, just drop data */ + if (!netif_running(kingsun->netdev)) { + err("ks959_send_irq: Network not running!"); + return; + } + + /* unlink, shutdown, unplug, other nasties */ + if (urb->status != 0) { + err("ks959_send_irq: urb asynchronously failed - %d", + urb->status); + return; + } + + if (kingsun->tx_buf_clear_used > 0) { + /* Update data remaining to be sent */ + if (kingsun->tx_buf_clear_sent < kingsun->tx_buf_clear_used) { + memmove(kingsun->tx_buf_clear, + kingsun->tx_buf_clear + + kingsun->tx_buf_clear_sent, + kingsun->tx_buf_clear_used - + kingsun->tx_buf_clear_sent); + } + kingsun->tx_buf_clear_used -= kingsun->tx_buf_clear_sent; + kingsun->tx_buf_clear_sent = 0; + + if (kingsun->tx_buf_clear_used > 0) { + /* There is more data to be sent */ + if ((ret = ks959_submit_tx_fragment(kingsun)) != 0) { + err("ks959_send_irq: failed tx_urb submit: %d", + ret); + switch (ret) { + case -ENODEV: + case -EPIPE: + break; + default: + kingsun->stats.tx_errors++; + netif_start_queue(netdev); + } + } + } else { + /* All data sent, send next speed && wake network queue */ + if (kingsun->new_speed != -1 && + cpu_to_le32(kingsun->new_speed) != + kingsun->speedparams.baudrate) + ks959_change_speed(kingsun, kingsun->new_speed); + + netif_wake_queue(netdev); + } + } +} + +/* + * Called from net/core when new frame is available. + */ +static int ks959_hard_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ks959_cb *kingsun; + unsigned int wraplen; + int ret = 0; + + if (skb == NULL || netdev == NULL) + return -EINVAL; + + netif_stop_queue(netdev); + + /* the IRDA wrapping routines don't deal with non linear skb */ + SKB_LINEAR_ASSERT(skb); + + kingsun = netdev_priv(netdev); + + spin_lock(&kingsun->lock); + kingsun->new_speed = irda_get_next_speed(skb); + + /* Append data to the end of whatever data remains to be transmitted */ + wraplen = + async_wrap_skb(skb, kingsun->tx_buf_clear, KINGSUN_SND_FIFO_SIZE); + kingsun->tx_buf_clear_used = wraplen; + + if ((ret = ks959_submit_tx_fragment(kingsun)) != 0) { + err("ks959_hard_xmit: failed tx_urb submit: %d", ret); + switch (ret) { + case -ENODEV: + case -EPIPE: + break; + default: + kingsun->stats.tx_errors++; + netif_start_queue(netdev); + } + } else { + kingsun->stats.tx_packets++; + kingsun->stats.tx_bytes += skb->len; + + } + + dev_kfree_skb(skb); + spin_unlock(&kingsun->lock); + + return ret; +} + +/* Receive callback function */ +static void ks959_rcv_irq(struct urb *urb) +{ + struct ks959_cb *kingsun = urb->context; + int ret; + + /* in process of stopping, just drop data */ + if (!netif_running(kingsun->netdev)) { + kingsun->receiving = 0; + return; + } + + /* unlink, shutdown, unplug, other nasties */ + if (urb->status != 0) { + err("kingsun_rcv_irq: urb asynchronously failed - %d", + urb->status); + kingsun->receiving = 0; + return; + } + + if (urb->actual_length > 0) { + __u8 *bytes = urb->transfer_buffer; + unsigned int i; + + for (i = 0; i < urb->actual_length; i++) { + /* De-obfuscation implemented here: variable portion of + xormask is incremented, and then used with the encoded + byte for the XOR. The result of the operation is used + to unwrap the SIR frame. */ + kingsun->rx_variable_xormask++; + bytes[i] = + bytes[i] ^ kingsun->rx_variable_xormask ^ 0x55u; + + /* rx_variable_xormask doubles as an index counter so we + can skip the byte at 0xff (wrapped around to 0). + */ + if (kingsun->rx_variable_xormask != 0) { + async_unwrap_char(kingsun->netdev, + &kingsun->stats, + &kingsun->rx_unwrap_buff, + bytes[i]); + } + } + kingsun->netdev->last_rx = jiffies; + do_gettimeofday(&kingsun->rx_time); + kingsun->receiving = + (kingsun->rx_unwrap_buff.state != OUTSIDE_FRAME) ? 1 : 0; + } + + /* This urb has already been filled in kingsun_net_open. Setup + packet must be re-filled, but it is assumed that urb keeps the + pointer to the initial setup packet, as well as the payload buffer. + Setup packet is already pre-filled at ks959_probe. + */ + urb->status = 0; + ret = usb_submit_urb(urb, GFP_ATOMIC); +} + +/* + * Function kingsun_net_open (dev) + * + * Network device is taken up. Usually this is done by "ifconfig irda0 up" + */ +static int ks959_net_open(struct net_device *netdev) +{ + struct ks959_cb *kingsun = netdev_priv(netdev); + int err = -ENOMEM; + char hwname[16]; + + /* At this point, urbs are NULL, and skb is NULL (see kingsun_probe) */ + kingsun->receiving = 0; + + /* Initialize for SIR to copy data directly into skb. */ + kingsun->rx_unwrap_buff.in_frame = FALSE; + kingsun->rx_unwrap_buff.state = OUTSIDE_FRAME; + kingsun->rx_unwrap_buff.truesize = IRDA_SKB_MAX_MTU; + kingsun->rx_unwrap_buff.skb = dev_alloc_skb(IRDA_SKB_MAX_MTU); + if (!kingsun->rx_unwrap_buff.skb) + goto free_mem; + + skb_reserve(kingsun->rx_unwrap_buff.skb, 1); + kingsun->rx_unwrap_buff.head = kingsun->rx_unwrap_buff.skb->data; + do_gettimeofday(&kingsun->rx_time); + + kingsun->rx_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!kingsun->rx_urb) + goto free_mem; + + kingsun->tx_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!kingsun->tx_urb) + goto free_mem; + + kingsun->speed_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!kingsun->speed_urb) + goto free_mem; + + /* Initialize speed for dongle */ + kingsun->new_speed = 9600; + err = ks959_change_speed(kingsun, 9600); + if (err < 0) + goto free_mem; + + /* + * Now that everything should be initialized properly, + * Open new IrLAP layer instance to take care of us... + */ + sprintf(hwname, "usb#%d", kingsun->usbdev->devnum); + kingsun->irlap = irlap_open(netdev, &kingsun->qos, hwname); + if (!kingsun->irlap) { + err("ks959-sir: irlap_open failed"); + goto free_mem; + } + + /* Start reception. Setup request already pre-filled in ks959_probe */ + usb_fill_control_urb(kingsun->rx_urb, kingsun->usbdev, + usb_rcvctrlpipe(kingsun->usbdev, 0), + (unsigned char *)kingsun->rx_setuprequest, + kingsun->rx_buf, KINGSUN_RCV_FIFO_SIZE, + ks959_rcv_irq, kingsun); + kingsun->rx_urb->status = 0; + err = usb_submit_urb(kingsun->rx_urb, GFP_KERNEL); + if (err) { + err("ks959-sir: first urb-submit failed: %d", err); + goto close_irlap; + } + + netif_start_queue(netdev); + + /* Situation at this point: + - all work buffers allocated + - urbs allocated and ready to fill + - max rx packet known (in max_rx) + - unwrap state machine initialized, in state outside of any frame + - receive request in progress + - IrLAP layer started, about to hand over packets to send + */ + + return 0; + + close_irlap: + irlap_close(kingsun->irlap); + free_mem: + usb_free_urb(kingsun->speed_urb); + kingsun->speed_urb = NULL; + usb_free_urb(kingsun->tx_urb); + kingsun->tx_urb = NULL; + usb_free_urb(kingsun->rx_urb); + kingsun->rx_urb = NULL; + if (kingsun->rx_unwrap_buff.skb) { + kfree_skb(kingsun->rx_unwrap_buff.skb); + kingsun->rx_unwrap_buff.skb = NULL; + kingsun->rx_unwrap_buff.head = NULL; + } + return err; +} + +/* + * Function kingsun_net_close (kingsun) + * + * Network device is taken down. Usually this is done by + * "ifconfig irda0 down" + */ +static int ks959_net_close(struct net_device *netdev) +{ + struct ks959_cb *kingsun = netdev_priv(netdev); + + /* Stop transmit processing */ + netif_stop_queue(netdev); + + /* Mop up receive && transmit urb's */ + usb_kill_urb(kingsun->tx_urb); + usb_free_urb(kingsun->tx_urb); + kingsun->tx_urb = NULL; + + usb_kill_urb(kingsun->speed_urb); + usb_free_urb(kingsun->speed_urb); + kingsun->speed_urb = NULL; + + usb_kill_urb(kingsun->rx_urb); + usb_free_urb(kingsun->rx_urb); + kingsun->rx_urb = NULL; + + kfree_skb(kingsun->rx_unwrap_buff.skb); + kingsun->rx_unwrap_buff.skb = NULL; + kingsun->rx_unwrap_buff.head = NULL; + kingsun->rx_unwrap_buff.in_frame = FALSE; + kingsun->rx_unwrap_buff.state = OUTSIDE_FRAME; + kingsun->receiving = 0; + + /* Stop and remove instance of IrLAP */ + if (kingsun->irlap) + irlap_close(kingsun->irlap); + + kingsun->irlap = NULL; + + return 0; +} + +/* + * IOCTLs : Extra out-of-band network commands... + */ +static int ks959_net_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) +{ + struct if_irda_req *irq = (struct if_irda_req *)rq; + struct ks959_cb *kingsun = netdev_priv(netdev); + int ret = 0; + + switch (cmd) { + case SIOCSBANDWIDTH: /* Set bandwidth */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Check if the device is still there */ + if (netif_device_present(kingsun->netdev)) + return ks959_change_speed(kingsun, irq->ifr_baudrate); + break; + + case SIOCSMEDIABUSY: /* Set media busy */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Check if the IrDA stack is still there */ + if (netif_running(kingsun->netdev)) + irda_device_set_media_busy(kingsun->netdev, TRUE); + break; + + case SIOCGRECEIVING: + /* Only approximately true */ + irq->ifr_receiving = kingsun->receiving; + break; + + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +/* + * Get device stats (for /proc/net/dev and ifconfig) + */ +static struct net_device_stats *ks959_net_get_stats(struct net_device *netdev) +{ + struct ks959_cb *kingsun = netdev_priv(netdev); + return &kingsun->stats; +} + +/* + * This routine is called by the USB subsystem for each new device + * in the system. We need to check if the device is ours, and in + * this case start handling it. + */ +static int ks959_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_device *dev = interface_to_usbdev(intf); + struct ks959_cb *kingsun = NULL; + struct net_device *net = NULL; + int ret = -ENOMEM; + + /* Allocate network device container. */ + net = alloc_irdadev(sizeof(*kingsun)); + if (!net) + goto err_out1; + + SET_MODULE_OWNER(net); + SET_NETDEV_DEV(net, &intf->dev); + kingsun = netdev_priv(net); + kingsun->netdev = net; + kingsun->usbdev = dev; + kingsun->irlap = NULL; + kingsun->tx_setuprequest = NULL; + kingsun->tx_urb = NULL; + kingsun->tx_buf_clear = NULL; + kingsun->tx_buf_xored = NULL; + kingsun->tx_buf_clear_used = 0; + kingsun->tx_buf_clear_sent = 0; + + kingsun->rx_setuprequest = NULL; + kingsun->rx_urb = NULL; + kingsun->rx_buf = NULL; + kingsun->rx_variable_xormask = 0; + kingsun->rx_unwrap_buff.in_frame = FALSE; + kingsun->rx_unwrap_buff.state = OUTSIDE_FRAME; + kingsun->rx_unwrap_buff.skb = NULL; + kingsun->receiving = 0; + spin_lock_init(&kingsun->lock); + + kingsun->speed_setuprequest = NULL; + kingsun->speed_urb = NULL; + kingsun->speedparams.baudrate = 0; + + /* Allocate input buffer */ + kingsun->rx_buf = kmalloc(KINGSUN_RCV_FIFO_SIZE, GFP_KERNEL); + if (!kingsun->rx_buf) + goto free_mem; + + /* Allocate input setup packet */ + kingsun->rx_setuprequest = + kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); + if (!kingsun->rx_setuprequest) + goto free_mem; + kingsun->rx_setuprequest->bRequestType = + USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE; + kingsun->rx_setuprequest->bRequest = KINGSUN_REQ_RECV; + kingsun->rx_setuprequest->wValue = cpu_to_le16(0x0200); + kingsun->rx_setuprequest->wIndex = 0; + kingsun->rx_setuprequest->wLength = cpu_to_le16(KINGSUN_RCV_FIFO_SIZE); + + /* Allocate output buffer */ + kingsun->tx_buf_clear = kmalloc(KINGSUN_SND_FIFO_SIZE, GFP_KERNEL); + if (!kingsun->tx_buf_clear) + goto free_mem; + kingsun->tx_buf_xored = kmalloc(KINGSUN_SND_PACKET_SIZE, GFP_KERNEL); + if (!kingsun->tx_buf_xored) + goto free_mem; + + /* Allocate and initialize output setup packet */ + kingsun->tx_setuprequest = + kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); + if (!kingsun->tx_setuprequest) + goto free_mem; + kingsun->tx_setuprequest->bRequestType = + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE; + kingsun->tx_setuprequest->bRequest = KINGSUN_REQ_SEND; + kingsun->tx_setuprequest->wValue = 0; + kingsun->tx_setuprequest->wIndex = 0; + kingsun->tx_setuprequest->wLength = 0; + + /* Allocate and initialize speed setup packet */ + kingsun->speed_setuprequest = + kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); + if (!kingsun->speed_setuprequest) + goto free_mem; + kingsun->speed_setuprequest->bRequestType = + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE; + kingsun->speed_setuprequest->bRequest = KINGSUN_REQ_SEND; + kingsun->speed_setuprequest->wValue = cpu_to_le16(0x0200); + kingsun->speed_setuprequest->wIndex = cpu_to_le16(0x0001); + kingsun->speed_setuprequest->wLength = + cpu_to_le16(sizeof(struct ks959_speedparams)); + + printk(KERN_INFO "KingSun KS-959 IRDA/USB found at address %d, " + "Vendor: %x, Product: %x\n", + dev->devnum, le16_to_cpu(dev->descriptor.idVendor), + le16_to_cpu(dev->descriptor.idProduct)); + + /* Initialize QoS for this device */ + irda_init_max_qos_capabilies(&kingsun->qos); + + /* Baud rates known to be supported. Please uncomment if devices (other + than a SonyEriccson K300 phone) can be shown to support higher speed + with this dongle. + */ + kingsun->qos.baud_rate.bits = + IR_2400 | IR_9600 | IR_19200 | IR_38400 | IR_57600; + kingsun->qos.min_turn_time.bits &= KINGSUN_MTT; + irda_qos_bits_to_value(&kingsun->qos); + + /* Override the network functions we need to use */ + net->hard_start_xmit = ks959_hard_xmit; + net->open = ks959_net_open; + net->stop = ks959_net_close; + net->get_stats = ks959_net_get_stats; + net->do_ioctl = ks959_net_ioctl; + + ret = register_netdev(net); + if (ret != 0) + goto free_mem; + + info("IrDA: Registered KingSun KS-959 device %s", net->name); + + usb_set_intfdata(intf, kingsun); + + /* Situation at this point: + - all work buffers allocated + - setup requests pre-filled + - urbs not allocated, set to NULL + - max rx packet known (is KINGSUN_FIFO_SIZE) + - unwrap state machine (partially) initialized, but skb == NULL + */ + + return 0; + + free_mem: + kfree(kingsun->speed_setuprequest); + kfree(kingsun->tx_setuprequest); + kfree(kingsun->tx_buf_xored); + kfree(kingsun->tx_buf_clear); + kfree(kingsun->rx_setuprequest); + kfree(kingsun->rx_buf); + free_netdev(net); + err_out1: + return ret; +} + +/* + * The current device is removed, the USB layer tell us to shut it down... + */ +static void ks959_disconnect(struct usb_interface *intf) +{ + struct ks959_cb *kingsun = usb_get_intfdata(intf); + + if (!kingsun) + return; + + unregister_netdev(kingsun->netdev); + + /* Mop up receive && transmit urb's */ + if (kingsun->speed_urb != NULL) { + usb_kill_urb(kingsun->speed_urb); + usb_free_urb(kingsun->speed_urb); + kingsun->speed_urb = NULL; + } + if (kingsun->tx_urb != NULL) { + usb_kill_urb(kingsun->tx_urb); + usb_free_urb(kingsun->tx_urb); + kingsun->tx_urb = NULL; + } + if (kingsun->rx_urb != NULL) { + usb_kill_urb(kingsun->rx_urb); + usb_free_urb(kingsun->rx_urb); + kingsun->rx_urb = NULL; + } + + kfree(kingsun->speed_setuprequest); + kfree(kingsun->tx_setuprequest); + kfree(kingsun->tx_buf_xored); + kfree(kingsun->tx_buf_clear); + kfree(kingsun->rx_setuprequest); + kfree(kingsun->rx_buf); + free_netdev(kingsun->netdev); + + usb_set_intfdata(intf, NULL); +} + +#ifdef CONFIG_PM +/* USB suspend, so power off the transmitter/receiver */ +static int ks959_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct ks959_cb *kingsun = usb_get_intfdata(intf); + + netif_device_detach(kingsun->netdev); + if (kingsun->speed_urb != NULL) + usb_kill_urb(kingsun->speed_urb); + if (kingsun->tx_urb != NULL) + usb_kill_urb(kingsun->tx_urb); + if (kingsun->rx_urb != NULL) + usb_kill_urb(kingsun->rx_urb); + return 0; +} + +/* Coming out of suspend, so reset hardware */ +static int ks959_resume(struct usb_interface *intf) +{ + struct ks959_cb *kingsun = usb_get_intfdata(intf); + + if (kingsun->rx_urb != NULL) { + /* Setup request already filled in ks959_probe */ + usb_submit_urb(kingsun->rx_urb, GFP_KERNEL); + } + netif_device_attach(kingsun->netdev); + + return 0; +} +#endif + +/* + * USB device callbacks + */ +static struct usb_driver irda_driver = { + .name = "ks959-sir", + .probe = ks959_probe, + .disconnect = ks959_disconnect, + .id_table = dongles, +#ifdef CONFIG_PM + .suspend = ks959_suspend, + .resume = ks959_resume, +#endif +}; + +/* + * Module insertion + */ +static int __init ks959_init(void) +{ + return usb_register(&irda_driver); +} + +module_init(ks959_init); + +/* + * Module removal + */ +static void __exit ks959_cleanup(void) +{ + /* Deregister the driver and remove all pending instances */ + usb_deregister(&irda_driver); +} + +module_exit(ks959_cleanup); + +MODULE_AUTHOR("Alex Villacís Lasso "); +MODULE_DESCRIPTION("IrDA-USB Dongle Driver for KingSun KS-959"); +MODULE_LICENSE("GPL"); diff -puN /dev/null drivers/net/irda/ksdazzle-sir.c --- /dev/null +++ a/drivers/net/irda/ksdazzle-sir.c @@ -0,0 +1,823 @@ +/***************************************************************************** +* +* Filename: ksdazzle.c +* Version: 0.1.1 +* Description: Irda KingSun Dazzle USB Dongle +* Status: Experimental +* Author: Alex Villacís Lasso +* +* Based on stir4200, mcs7780, kingsun-sir drivers. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****************************************************************************/ + +/* + * Following is my most current (2007-07-26) understanding of how the Kingsun + * 07D0:4100 dongle (sometimes known as the MA-660) is supposed to work. This + * information was deduced by examining the USB traffic captured with USBSnoopy + * from the WinXP driver. Feel free to update here as more of the dongle is + * known. + * + * General: This dongle exposes one interface with two interrupt endpoints, one + * IN and one OUT. In this regard, it is similar to what the Kingsun/Donshine + * dongle (07c0:4200) exposes. Traffic is raw and needs to be wrapped and + * unwrapped manually as in stir4200, kingsun-sir, and ks959-sir. + * + * Transmission: To transmit an IrDA frame, it is necessary to wrap it, then + * split it into multiple segments of up to 7 bytes each, and transmit each in + * sequence. It seems that sending a single big block (like kingsun-sir does) + * won't work with this dongle. Each segment needs to be prefixed with a value + * equal to (unsigned char)0xF8 + , inside a payload + * of exactly 8 bytes. For example, a segment of 1 byte gets prefixed by 0xF9, + * and one of 7 bytes gets prefixed by 0xFF. The bytes at the end of the + * payload, not considered by the prefix, are ignored (set to 0 by this + * implementation). + * + * Reception: To receive data, the driver must poll the dongle regularly (like + * kingsun-sir.c) with interrupt URBs. If data is available, it will be returned + * in payloads from 0 to 8 bytes long. When concatenated, these payloads form + * a raw IrDA stream that needs to be unwrapped as in stir4200 and kingsun-sir + * + * Speed change: To change the speed of the dongle, the driver prepares a + * control URB with the following as a setup packet: + * bRequestType USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE + * bRequest 0x09 + * wValue 0x0200 + * wIndex 0x0001 + * wLength 0x0008 (length of the payload) + * The payload is a 8-byte record, apparently identical to the one used in + * drivers/usb/serial/cypress_m8.c to change speed: + * __u32 baudSpeed; + * unsigned int dataBits : 2; // 0 - 5 bits 3 - 8 bits + * unsigned int : 1; + * unsigned int stopBits : 1; + * unsigned int parityEnable : 1; + * unsigned int parityType : 1; + * unsigned int : 1; + * unsigned int reset : 1; + * unsigned char reserved[3]; // set to 0 + * + * For now only SIR speeds have been observed with this dongle. Therefore, + * nothing is known on what changes (if any) must be done to frame wrapping / + * unwrapping for higher than SIR speeds. This driver assumes no change is + * necessary and announces support for all the way to 115200 bps. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define KSDAZZLE_VENDOR_ID 0x07d0 +#define KSDAZZLE_PRODUCT_ID 0x4100 + +/* These are the currently known USB ids */ +static struct usb_device_id dongles[] = { + /* KingSun Co,Ltd IrDA/USB Bridge */ + {USB_DEVICE(KSDAZZLE_VENDOR_ID, KSDAZZLE_PRODUCT_ID)}, + {} +}; + +MODULE_DEVICE_TABLE(usb, dongles); + +#define KINGSUN_MTT 0x07 +#define KINGSUN_REQ_RECV 0x01 +#define KINGSUN_REQ_SEND 0x09 + +#define KINGSUN_SND_FIFO_SIZE 2048 /* Max packet we can send */ + +struct ksdazzle_speedparams { + __le32 baudrate; /* baud rate, little endian */ + __u8 flags; + __u8 reserved[3]; +} __attribute__ ((packed)); + +#define KS_DATA_5_BITS 0x00 +#define KS_DATA_6_BITS 0x01 +#define KS_DATA_7_BITS 0x02 +#define KS_DATA_8_BITS 0x03 + +#define KS_STOP_BITS_1 0x00 +#define KS_STOP_BITS_2 0x08 + +#define KS_PAR_DISABLE 0x00 +#define KS_PAR_EVEN 0x10 +#define KS_PAR_ODD 0x30 +#define KS_RESET 0x80 + +#define KINGSUN_EP_IN 0 +#define KINGSUN_EP_OUT 1 + +struct ksdazzle_cb { + struct usb_device *usbdev; /* init: probe_irda */ + struct net_device *netdev; /* network layer */ + struct irlap_cb *irlap; /* The link layer we are binded to */ + struct net_device_stats stats; /* network statistics */ + struct qos_info qos; + + struct urb *tx_urb; + __u8 *tx_buf_clear; + unsigned int tx_buf_clear_used; + unsigned int tx_buf_clear_sent; + __u8 tx_payload[8]; + + struct urb *rx_urb; + __u8 rx_payload[8]; + iobuff_t rx_unwrap_buff; + + struct usb_ctrlrequest *speed_setuprequest; + struct urb *speed_urb; + struct ksdazzle_speedparams speedparams; + unsigned int new_speed; + + __u8 ep_in; + __u8 ep_out; + + spinlock_t lock; + int receiving; +}; + +/* Callback transmission routine */ +static void ksdazzle_speed_irq(struct urb *urb) +{ + /* unlink, shutdown, unplug, other nasties */ + if (urb->status != 0) { + err("ksdazzle_speed_irq: urb asynchronously failed - %d", + urb->status); + } +} + +/* Send a control request to change speed of the dongle */ +static int ksdazzle_change_speed(struct ksdazzle_cb *kingsun, unsigned speed) +{ + static unsigned int supported_speeds[] = { 2400, 9600, 19200, 38400, + 57600, 115200, 576000, 1152000, 4000000, 0 + }; + int err; + unsigned int i; + + if (kingsun->speed_setuprequest == NULL || kingsun->speed_urb == NULL) + return -ENOMEM; + + /* Check that requested speed is among the supported ones */ + for (i = 0; supported_speeds[i] && supported_speeds[i] != speed; i++) ; + if (supported_speeds[i] == 0) + return -EOPNOTSUPP; + + memset(&(kingsun->speedparams), 0, sizeof(struct ksdazzle_speedparams)); + kingsun->speedparams.baudrate = cpu_to_le32(speed); + kingsun->speedparams.flags = KS_DATA_8_BITS; + + /* speed_setuprequest pre-filled in ksdazzle_probe */ + usb_fill_control_urb(kingsun->speed_urb, kingsun->usbdev, + usb_sndctrlpipe(kingsun->usbdev, 0), + (unsigned char *)kingsun->speed_setuprequest, + &(kingsun->speedparams), + sizeof(struct ksdazzle_speedparams), + ksdazzle_speed_irq, kingsun); + kingsun->speed_urb->status = 0; + err = usb_submit_urb(kingsun->speed_urb, GFP_ATOMIC); + + return err; +} + +/* Submit one fragment of an IrDA frame to the dongle */ +static void ksdazzle_send_irq(struct urb *urb); +static int ksdazzle_submit_tx_fragment(struct ksdazzle_cb *kingsun) +{ + unsigned int wraplen; + int ret; + + /* We can send at most 7 bytes of payload at a time */ + wraplen = 7; + if (wraplen > kingsun->tx_buf_clear_used) + wraplen = kingsun->tx_buf_clear_used; + + /* Prepare payload prefix with used length */ + memset(kingsun->tx_payload, 0, 8); + kingsun->tx_payload[0] = (unsigned char)0xf8 + wraplen; + memcpy(kingsun->tx_payload + 1, kingsun->tx_buf_clear, wraplen); + + usb_fill_int_urb(kingsun->tx_urb, kingsun->usbdev, + usb_sndintpipe(kingsun->usbdev, kingsun->ep_out), + kingsun->tx_payload, 8, ksdazzle_send_irq, kingsun, 1); + kingsun->tx_urb->status = 0; + ret = usb_submit_urb(kingsun->tx_urb, GFP_ATOMIC); + + /* Remember how much data was sent, in order to update at callback */ + kingsun->tx_buf_clear_sent = (ret == 0) ? wraplen : 0; + return ret; +} + +/* Callback transmission routine */ +static void ksdazzle_send_irq(struct urb *urb) +{ + struct ksdazzle_cb *kingsun = urb->context; + struct net_device *netdev = kingsun->netdev; + int ret = 0; + + /* in process of stopping, just drop data */ + if (!netif_running(kingsun->netdev)) { + err("ksdazzle_send_irq: Network not running!"); + return; + } + + /* unlink, shutdown, unplug, other nasties */ + if (urb->status != 0) { + err("ksdazzle_send_irq: urb asynchronously failed - %d", + urb->status); + return; + } + + if (kingsun->tx_buf_clear_used > 0) { + /* Update data remaining to be sent */ + if (kingsun->tx_buf_clear_sent < kingsun->tx_buf_clear_used) { + memmove(kingsun->tx_buf_clear, + kingsun->tx_buf_clear + + kingsun->tx_buf_clear_sent, + kingsun->tx_buf_clear_used - + kingsun->tx_buf_clear_sent); + } + kingsun->tx_buf_clear_used -= kingsun->tx_buf_clear_sent; + kingsun->tx_buf_clear_sent = 0; + + if (kingsun->tx_buf_clear_used > 0) { + /* There is more data to be sent */ + if ((ret = ksdazzle_submit_tx_fragment(kingsun)) != 0) { + err("ksdazzle_send_irq: failed tx_urb submit: %d", ret); + switch (ret) { + case -ENODEV: + case -EPIPE: + break; + default: + kingsun->stats.tx_errors++; + netif_start_queue(netdev); + } + } + } else { + /* All data sent, send next speed && wake network queue */ + if (kingsun->new_speed != -1 && + cpu_to_le32(kingsun->new_speed) != + kingsun->speedparams.baudrate) + ksdazzle_change_speed(kingsun, + kingsun->new_speed); + + netif_wake_queue(netdev); + } + } +} + +/* + * Called from net/core when new frame is available. + */ +static int ksdazzle_hard_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ksdazzle_cb *kingsun; + unsigned int wraplen; + int ret = 0; + + if (skb == NULL || netdev == NULL) + return -EINVAL; + + netif_stop_queue(netdev); + + /* the IRDA wrapping routines don't deal with non linear skb */ + SKB_LINEAR_ASSERT(skb); + + kingsun = netdev_priv(netdev); + + spin_lock(&kingsun->lock); + kingsun->new_speed = irda_get_next_speed(skb); + + /* Append data to the end of whatever data remains to be transmitted */ + wraplen = + async_wrap_skb(skb, kingsun->tx_buf_clear, KINGSUN_SND_FIFO_SIZE); + kingsun->tx_buf_clear_used = wraplen; + + if ((ret = ksdazzle_submit_tx_fragment(kingsun)) != 0) { + err("ksdazzle_hard_xmit: failed tx_urb submit: %d", ret); + switch (ret) { + case -ENODEV: + case -EPIPE: + break; + default: + kingsun->stats.tx_errors++; + netif_start_queue(netdev); + } + } else { + kingsun->stats.tx_packets++; + kingsun->stats.tx_bytes += skb->len; + + } + + dev_kfree_skb(skb); + spin_unlock(&kingsun->lock); + + return ret; +} + +/* Receive callback function */ +static void ksdazzle_rcv_irq(struct urb *urb) +{ + struct ksdazzle_cb *kingsun = urb->context; + + /* in process of stopping, just drop data */ + if (!netif_running(kingsun->netdev)) { + kingsun->receiving = 0; + return; + } + + /* unlink, shutdown, unplug, other nasties */ + if (urb->status != 0) { + err("ksdazzle_rcv_irq: urb asynchronously failed - %d", + urb->status); + kingsun->receiving = 0; + return; + } + + if (urb->actual_length > 0) { + __u8 *bytes = urb->transfer_buffer; + unsigned int i; + + for (i = 0; i < urb->actual_length; i++) { + async_unwrap_char(kingsun->netdev, &kingsun->stats, + &kingsun->rx_unwrap_buff, bytes[i]); + } + kingsun->netdev->last_rx = jiffies; + kingsun->receiving = + (kingsun->rx_unwrap_buff.state != OUTSIDE_FRAME) ? 1 : 0; + } + + /* This urb has already been filled in ksdazzle_net_open. It is assumed that + urb keeps the pointer to the payload buffer. + */ + urb->status = 0; + usb_submit_urb(urb, GFP_ATOMIC); +} + +/* + * Function ksdazzle_net_open (dev) + * + * Network device is taken up. Usually this is done by "ifconfig irda0 up" + */ +static int ksdazzle_net_open(struct net_device *netdev) +{ + struct ksdazzle_cb *kingsun = netdev_priv(netdev); + int err = -ENOMEM; + char hwname[16]; + + /* At this point, urbs are NULL, and skb is NULL (see ksdazzle_probe) */ + kingsun->receiving = 0; + + /* Initialize for SIR to copy data directly into skb. */ + kingsun->rx_unwrap_buff.in_frame = FALSE; + kingsun->rx_unwrap_buff.state = OUTSIDE_FRAME; + kingsun->rx_unwrap_buff.truesize = IRDA_SKB_MAX_MTU; + kingsun->rx_unwrap_buff.skb = dev_alloc_skb(IRDA_SKB_MAX_MTU); + if (!kingsun->rx_unwrap_buff.skb) + goto free_mem; + + skb_reserve(kingsun->rx_unwrap_buff.skb, 1); + kingsun->rx_unwrap_buff.head = kingsun->rx_unwrap_buff.skb->data; + + kingsun->rx_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!kingsun->rx_urb) + goto free_mem; + + kingsun->tx_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!kingsun->tx_urb) + goto free_mem; + + kingsun->speed_urb = usb_alloc_urb(0, GFP_KERNEL); + if (!kingsun->speed_urb) + goto free_mem; + + /* Initialize speed for dongle */ + kingsun->new_speed = 9600; + err = ksdazzle_change_speed(kingsun, 9600); + if (err < 0) + goto free_mem; + + /* + * Now that everything should be initialized properly, + * Open new IrLAP layer instance to take care of us... + */ + sprintf(hwname, "usb#%d", kingsun->usbdev->devnum); + kingsun->irlap = irlap_open(netdev, &kingsun->qos, hwname); + if (!kingsun->irlap) { + err("ksdazzle-sir: irlap_open failed"); + goto free_mem; + } + + /* Start reception. */ + usb_fill_int_urb(kingsun->rx_urb, kingsun->usbdev, + usb_rcvintpipe(kingsun->usbdev, kingsun->ep_in), + kingsun->rx_payload, 8, ksdazzle_rcv_irq, kingsun, 1); + kingsun->rx_urb->status = 0; + err = usb_submit_urb(kingsun->rx_urb, GFP_KERNEL); + if (err) { + err("ksdazzle-sir: first urb-submit failed: %d", err); + goto close_irlap; + } + + netif_start_queue(netdev); + + /* Situation at this point: + - all work buffers allocated + - urbs allocated and ready to fill + - max rx packet known (in max_rx) + - unwrap state machine initialized, in state outside of any frame + - receive request in progress + - IrLAP layer started, about to hand over packets to send + */ + + return 0; + + close_irlap: + irlap_close(kingsun->irlap); + free_mem: + usb_free_urb(kingsun->speed_urb); + kingsun->speed_urb = NULL; + usb_free_urb(kingsun->tx_urb); + kingsun->tx_urb = NULL; + usb_free_urb(kingsun->rx_urb); + kingsun->rx_urb = NULL; + if (kingsun->rx_unwrap_buff.skb) { + kfree_skb(kingsun->rx_unwrap_buff.skb); + kingsun->rx_unwrap_buff.skb = NULL; + kingsun->rx_unwrap_buff.head = NULL; + } + return err; +} + +/* + * Function ksdazzle_net_close (dev) + * + * Network device is taken down. Usually this is done by + * "ifconfig irda0 down" + */ +static int ksdazzle_net_close(struct net_device *netdev) +{ + struct ksdazzle_cb *kingsun = netdev_priv(netdev); + + /* Stop transmit processing */ + netif_stop_queue(netdev); + + /* Mop up receive && transmit urb's */ + usb_kill_urb(kingsun->tx_urb); + usb_free_urb(kingsun->tx_urb); + kingsun->tx_urb = NULL; + + usb_kill_urb(kingsun->speed_urb); + usb_free_urb(kingsun->speed_urb); + kingsun->speed_urb = NULL; + + usb_kill_urb(kingsun->rx_urb); + usb_free_urb(kingsun->rx_urb); + kingsun->rx_urb = NULL; + + kfree_skb(kingsun->rx_unwrap_buff.skb); + kingsun->rx_unwrap_buff.skb = NULL; + kingsun->rx_unwrap_buff.head = NULL; + kingsun->rx_unwrap_buff.in_frame = FALSE; + kingsun->rx_unwrap_buff.state = OUTSIDE_FRAME; + kingsun->receiving = 0; + + /* Stop and remove instance of IrLAP */ + irlap_close(kingsun->irlap); + + kingsun->irlap = NULL; + + return 0; +} + +/* + * IOCTLs : Extra out-of-band network commands... + */ +static int ksdazzle_net_ioctl(struct net_device *netdev, struct ifreq *rq, + int cmd) +{ + struct if_irda_req *irq = (struct if_irda_req *)rq; + struct ksdazzle_cb *kingsun = netdev_priv(netdev); + int ret = 0; + + switch (cmd) { + case SIOCSBANDWIDTH: /* Set bandwidth */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Check if the device is still there */ + if (netif_device_present(kingsun->netdev)) + return ksdazzle_change_speed(kingsun, + irq->ifr_baudrate); + break; + + case SIOCSMEDIABUSY: /* Set media busy */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* Check if the IrDA stack is still there */ + if (netif_running(kingsun->netdev)) + irda_device_set_media_busy(kingsun->netdev, TRUE); + break; + + case SIOCGRECEIVING: + /* Only approximately true */ + irq->ifr_receiving = kingsun->receiving; + break; + + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +/* + * Get device stats (for /proc/net/dev and ifconfig) + */ +static struct net_device_stats *ksdazzle_net_get_stats(struct net_device + *netdev) +{ + struct ksdazzle_cb *kingsun = netdev_priv(netdev); + return &kingsun->stats; +} + +/* + * This routine is called by the USB subsystem for each new device + * in the system. We need to check if the device is ours, and in + * this case start handling it. + */ +static int ksdazzle_probe(struct usb_interface *intf, + const struct usb_device_id *id) +{ + struct usb_host_interface *interface; + struct usb_endpoint_descriptor *endpoint; + + struct usb_device *dev = interface_to_usbdev(intf); + struct ksdazzle_cb *kingsun = NULL; + struct net_device *net = NULL; + int ret = -ENOMEM; + int pipe, maxp_in, maxp_out; + __u8 ep_in; + __u8 ep_out; + + /* Check that there really are two interrupt endpoints. Check based on the + one in drivers/usb/input/usbmouse.c + */ + interface = intf->cur_altsetting; + if (interface->desc.bNumEndpoints != 2) { + err("ksdazzle: expected 2 endpoints, found %d", + interface->desc.bNumEndpoints); + return -ENODEV; + } + endpoint = &interface->endpoint[KINGSUN_EP_IN].desc; + if (!usb_endpoint_is_int_in(endpoint)) { + err("ksdazzle: endpoint 0 is not interrupt IN"); + return -ENODEV; + } + + ep_in = endpoint->bEndpointAddress; + pipe = usb_rcvintpipe(dev, ep_in); + maxp_in = usb_maxpacket(dev, pipe, usb_pipeout(pipe)); + if (maxp_in > 255 || maxp_in <= 1) { + err("ksdazzle: endpoint 0 has max packet size %d not in range [2..255]", maxp_in); + return -ENODEV; + } + + endpoint = &interface->endpoint[KINGSUN_EP_OUT].desc; + if (!usb_endpoint_is_int_out(endpoint)) { + err("ksdazzle: endpoint 1 is not interrupt OUT"); + return -ENODEV; + } + + ep_out = endpoint->bEndpointAddress; + pipe = usb_sndintpipe(dev, ep_out); + maxp_out = usb_maxpacket(dev, pipe, usb_pipeout(pipe)); + + /* Allocate network device container. */ + net = alloc_irdadev(sizeof(*kingsun)); + if (!net) + goto err_out1; + + SET_MODULE_OWNER(net); + SET_NETDEV_DEV(net, &intf->dev); + kingsun = netdev_priv(net); + kingsun->netdev = net; + kingsun->usbdev = dev; + kingsun->ep_in = ep_in; + kingsun->ep_out = ep_out; + kingsun->irlap = NULL; + kingsun->tx_urb = NULL; + kingsun->tx_buf_clear = NULL; + kingsun->tx_buf_clear_used = 0; + kingsun->tx_buf_clear_sent = 0; + + kingsun->rx_urb = NULL; + kingsun->rx_unwrap_buff.in_frame = FALSE; + kingsun->rx_unwrap_buff.state = OUTSIDE_FRAME; + kingsun->rx_unwrap_buff.skb = NULL; + kingsun->receiving = 0; + spin_lock_init(&kingsun->lock); + + kingsun->speed_setuprequest = NULL; + kingsun->speed_urb = NULL; + kingsun->speedparams.baudrate = 0; + + /* Allocate output buffer */ + kingsun->tx_buf_clear = kmalloc(KINGSUN_SND_FIFO_SIZE, GFP_KERNEL); + if (!kingsun->tx_buf_clear) + goto free_mem; + + /* Allocate and initialize speed setup packet */ + kingsun->speed_setuprequest = + kmalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); + if (!kingsun->speed_setuprequest) + goto free_mem; + kingsun->speed_setuprequest->bRequestType = + USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE; + kingsun->speed_setuprequest->bRequest = KINGSUN_REQ_SEND; + kingsun->speed_setuprequest->wValue = cpu_to_le16(0x0200); + kingsun->speed_setuprequest->wIndex = cpu_to_le16(0x0001); + kingsun->speed_setuprequest->wLength = + cpu_to_le16(sizeof(struct ksdazzle_speedparams)); + + printk(KERN_INFO "KingSun/Dazzle IRDA/USB found at address %d, " + "Vendor: %x, Product: %x\n", + dev->devnum, le16_to_cpu(dev->descriptor.idVendor), + le16_to_cpu(dev->descriptor.idProduct)); + + /* Initialize QoS for this device */ + irda_init_max_qos_capabilies(&kingsun->qos); + + /* Baud rates known to be supported. Please uncomment if devices (other + than a SonyEriccson K300 phone) can be shown to support higher speeds + with this dongle. + */ + kingsun->qos.baud_rate.bits = + IR_2400 | IR_9600 | IR_19200 | IR_38400 | IR_57600 | IR_115200; + kingsun->qos.min_turn_time.bits &= KINGSUN_MTT; + irda_qos_bits_to_value(&kingsun->qos); + + /* Override the network functions we need to use */ + net->hard_start_xmit = ksdazzle_hard_xmit; + net->open = ksdazzle_net_open; + net->stop = ksdazzle_net_close; + net->get_stats = ksdazzle_net_get_stats; + net->do_ioctl = ksdazzle_net_ioctl; + + ret = register_netdev(net); + if (ret != 0) + goto free_mem; + + info("IrDA: Registered KingSun/Dazzle device %s", net->name); + + usb_set_intfdata(intf, kingsun); + + /* Situation at this point: + - all work buffers allocated + - setup requests pre-filled + - urbs not allocated, set to NULL + - max rx packet known (is KINGSUN_FIFO_SIZE) + - unwrap state machine (partially) initialized, but skb == NULL + */ + + return 0; + + free_mem: + kfree(kingsun->speed_setuprequest); + kfree(kingsun->tx_buf_clear); + free_netdev(net); + err_out1: + return ret; +} + +/* + * The current device is removed, the USB layer tell us to shut it down... + */ +static void ksdazzle_disconnect(struct usb_interface *intf) +{ + struct ksdazzle_cb *kingsun = usb_get_intfdata(intf); + + if (!kingsun) + return; + + unregister_netdev(kingsun->netdev); + + /* Mop up receive && transmit urb's */ + usb_kill_urb(kingsun->speed_urb); + usb_free_urb(kingsun->speed_urb); + kingsun->speed_urb = NULL; + + usb_kill_urb(kingsun->tx_urb); + usb_free_urb(kingsun->tx_urb); + kingsun->tx_urb = NULL; + + usb_kill_urb(kingsun->rx_urb); + usb_free_urb(kingsun->rx_urb); + kingsun->rx_urb = NULL; + + kfree(kingsun->speed_setuprequest); + kfree(kingsun->tx_buf_clear); + free_netdev(kingsun->netdev); + + usb_set_intfdata(intf, NULL); +} + +#ifdef CONFIG_PM +/* USB suspend, so power off the transmitter/receiver */ +static int ksdazzle_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct ksdazzle_cb *kingsun = usb_get_intfdata(intf); + + netif_device_detach(kingsun->netdev); + if (kingsun->speed_urb != NULL) + usb_kill_urb(kingsun->speed_urb); + if (kingsun->tx_urb != NULL) + usb_kill_urb(kingsun->tx_urb); + if (kingsun->rx_urb != NULL) + usb_kill_urb(kingsun->rx_urb); + return 0; +} + +/* Coming out of suspend, so reset hardware */ +static int ksdazzle_resume(struct usb_interface *intf) +{ + struct ksdazzle_cb *kingsun = usb_get_intfdata(intf); + + if (kingsun->rx_urb != NULL) { + /* Setup request already filled in ksdazzle_probe */ + usb_submit_urb(kingsun->rx_urb, GFP_KERNEL); + } + netif_device_attach(kingsun->netdev); + + return 0; +} +#endif + +/* + * USB device callbacks + */ +static struct usb_driver irda_driver = { + .name = "ksdazzle-sir", + .probe = ksdazzle_probe, + .disconnect = ksdazzle_disconnect, + .id_table = dongles, +#ifdef CONFIG_PM + .suspend = ksdazzle_suspend, + .resume = ksdazzle_resume, +#endif +}; + +/* + * Module insertion + */ +static int __init ksdazzle_init(void) +{ + return usb_register(&irda_driver); +} + +module_init(ksdazzle_init); + +/* + * Module removal + */ +static void __exit ksdazzle_cleanup(void) +{ + /* Deregister the driver and remove all pending instances */ + usb_deregister(&irda_driver); +} + +module_exit(ksdazzle_cleanup); + +MODULE_AUTHOR("Alex Villacís Lasso "); +MODULE_DESCRIPTION("IrDA-USB Dongle Driver for KingSun Dazzle"); +MODULE_LICENSE("GPL"); diff -puN drivers/net/ixgb/ixgb.h~git-net drivers/net/ixgb/ixgb.h --- a/drivers/net/ixgb/ixgb.h~git-net +++ a/drivers/net/ixgb/ixgb.h @@ -184,6 +184,7 @@ struct ixgb_adapter { boolean_t rx_csum; /* OS defined structs */ + struct napi_struct napi; struct net_device *netdev; struct pci_dev *pdev; struct net_device_stats net_stats; diff -puN drivers/net/ixgb/ixgb_main.c~git-net drivers/net/ixgb/ixgb_main.c --- a/drivers/net/ixgb/ixgb_main.c~git-net +++ a/drivers/net/ixgb/ixgb_main.c @@ -97,7 +97,7 @@ static irqreturn_t ixgb_intr(int irq, vo static boolean_t ixgb_clean_tx_irq(struct ixgb_adapter *adapter); #ifdef CONFIG_IXGB_NAPI -static int ixgb_clean(struct net_device *netdev, int *budget); +static int ixgb_clean(struct napi_struct *napi, int budget); static boolean_t ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do); #else @@ -288,7 +288,7 @@ ixgb_up(struct ixgb_adapter *adapter) mod_timer(&adapter->watchdog_timer, jiffies); #ifdef CONFIG_IXGB_NAPI - netif_poll_enable(netdev); + napi_enable(&adapter->napi); #endif ixgb_irq_enable(adapter); @@ -309,7 +309,7 @@ ixgb_down(struct ixgb_adapter *adapter, if(kill_watchdog) del_timer_sync(&adapter->watchdog_timer); #ifdef CONFIG_IXGB_NAPI - netif_poll_disable(netdev); + napi_disable(&adapter->napi); #endif adapter->link_speed = 0; adapter->link_duplex = 0; @@ -421,8 +421,7 @@ ixgb_probe(struct pci_dev *pdev, netdev->tx_timeout = &ixgb_tx_timeout; netdev->watchdog_timeo = 5 * HZ; #ifdef CONFIG_IXGB_NAPI - netdev->poll = &ixgb_clean; - netdev->weight = 64; + netif_napi_add(netdev, &adapter->napi, ixgb_clean, 64); #endif netdev->vlan_rx_register = ixgb_vlan_rx_register; netdev->vlan_rx_add_vid = ixgb_vlan_rx_add_vid; @@ -1746,7 +1745,7 @@ ixgb_intr(int irq, void *data) } #ifdef CONFIG_IXGB_NAPI - if(netif_rx_schedule_prep(netdev)) { + if (netif_rx_schedule_prep(netdev, &adapter->napi)) { /* Disable interrupts and register for poll. The flush of the posted write is intentionally left out. @@ -1754,7 +1753,7 @@ ixgb_intr(int irq, void *data) atomic_inc(&adapter->irq_sem); IXGB_WRITE_REG(&adapter->hw, IMC, ~0); - __netif_rx_schedule(netdev); + __netif_rx_schedule(netdev, &adapter->napi); } #else /* yes, that is actually a & and it is meant to make sure that @@ -1776,27 +1775,23 @@ ixgb_intr(int irq, void *data) **/ static int -ixgb_clean(struct net_device *netdev, int *budget) +ixgb_clean(struct napi_struct *napi, int budget) { - struct ixgb_adapter *adapter = netdev_priv(netdev); - int work_to_do = min(*budget, netdev->quota); + struct ixgb_adapter *adapter = container_of(napi, struct ixgb_adapter, napi); + struct net_device *netdev = adapter->netdev; int tx_cleaned; int work_done = 0; tx_cleaned = ixgb_clean_tx_irq(adapter); - ixgb_clean_rx_irq(adapter, &work_done, work_to_do); - - *budget -= work_done; - netdev->quota -= work_done; + ixgb_clean_rx_irq(adapter, &work_done, budget); /* if no Tx and not enough Rx work done, exit the polling mode */ if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) { - netif_rx_complete(netdev); + netif_rx_complete(netdev, napi); ixgb_irq_enable(adapter); - return 0; } - return 1; + return work_done; } #endif diff -puN drivers/net/ixp2000/ixpdev.c~git-net drivers/net/ixp2000/ixpdev.c --- a/drivers/net/ixp2000/ixpdev.c~git-net +++ a/drivers/net/ixp2000/ixpdev.c @@ -74,9 +74,9 @@ static int ixpdev_xmit(struct sk_buff *s } -static int ixpdev_rx(struct net_device *dev, int *budget) +static int ixpdev_rx(struct net_device *dev, int processed, int budget) { - while (*budget > 0) { + while (processed < budget) { struct ixpdev_rx_desc *desc; struct sk_buff *skb; void *buf; @@ -122,29 +122,34 @@ static int ixpdev_rx(struct net_device * err: ixp2000_reg_write(RING_RX_PENDING, _desc); - dev->quota--; - (*budget)--; + processed++; } - return 1; + return processed; } /* dev always points to nds[0]. */ -static int ixpdev_poll(struct net_device *dev, int *budget) +static int ixpdev_poll(struct napi_struct *napi, int budget) { + struct ixpdev_priv *ip = container_of(napi, struct ixpdev_priv, napi); + struct net_device *dev = ip->dev; + int rx; + /* @@@ Have to stop polling when nds[0] is administratively * downed while we are polling. */ + rx = 0; do { ixp2000_reg_write(IXP2000_IRQ_THD_RAW_STATUS_A_0, 0x00ff); - if (ixpdev_rx(dev, budget)) - return 1; + rx = ixpdev_rx(dev, rx, budget); + if (rx >= budget) + break; } while (ixp2000_reg_read(IXP2000_IRQ_THD_RAW_STATUS_A_0) & 0x00ff); - netif_rx_complete(dev); + netif_rx_complete(dev, napi); ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_SET_A_0, 0x00ff); - return 0; + return rx; } static void ixpdev_tx_complete(void) @@ -199,9 +204,12 @@ static irqreturn_t ixpdev_interrupt(int * Any of the eight receive units signaled RX? */ if (status & 0x00ff) { + struct net_device *dev = nds[0]; + struct ixpdev_priv *ip = netdev_priv(dev); + ixp2000_reg_wrb(IXP2000_IRQ_THD_ENABLE_CLEAR_A_0, 0x00ff); - if (likely(__netif_rx_schedule_prep(nds[0]))) { - __netif_rx_schedule(nds[0]); + if (likely(napi_schedule_prep(&ip->napi))) { + __netif_rx_schedule(dev, &ip->napi); } else { printk(KERN_CRIT "ixp2000: irq while polling!!\n"); } @@ -232,11 +240,13 @@ static int ixpdev_open(struct net_device struct ixpdev_priv *ip = netdev_priv(dev); int err; + napi_enable(&ip->napi); if (!nds_open++) { err = request_irq(IRQ_IXP2000_THDA0, ixpdev_interrupt, IRQF_SHARED, "ixp2000_eth", nds); if (err) { nds_open--; + napi_disable(&ip->napi); return err; } @@ -254,6 +264,7 @@ static int ixpdev_close(struct net_devic struct ixpdev_priv *ip = netdev_priv(dev); netif_stop_queue(dev); + napi_disable(&ip->napi); set_port_admin_status(ip->channel, 0); if (!--nds_open) { @@ -274,7 +285,6 @@ struct net_device *ixpdev_alloc(int chan return NULL; dev->hard_start_xmit = ixpdev_xmit; - dev->poll = ixpdev_poll; dev->open = ixpdev_open; dev->stop = ixpdev_close; #ifdef CONFIG_NET_POLL_CONTROLLER @@ -282,9 +292,10 @@ struct net_device *ixpdev_alloc(int chan #endif dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; - dev->weight = 64; ip = netdev_priv(dev); + ip->dev = dev; + netif_napi_add(dev, &ip->napi, ixpdev_poll, 64); ip->channel = channel; ip->tx_queue_entries = 0; diff -puN drivers/net/ixp2000/ixpdev.h~git-net drivers/net/ixp2000/ixpdev.h --- a/drivers/net/ixp2000/ixpdev.h~git-net +++ a/drivers/net/ixp2000/ixpdev.h @@ -14,6 +14,8 @@ struct ixpdev_priv { + struct net_device *dev; + struct napi_struct napi; int channel; int tx_queue_entries; }; diff -puN drivers/net/macb.c~git-net drivers/net/macb.c --- a/drivers/net/macb.c~git-net +++ a/drivers/net/macb.c @@ -470,22 +470,24 @@ static int macb_rx(struct macb *bp, int return received; } -static int macb_poll(struct net_device *dev, int *budget) +static int macb_poll(struct napi_struct *napi, int budget) { - struct macb *bp = netdev_priv(dev); - int orig_budget, work_done, retval = 0; + struct macb *bp = container_of(napi, struct macb, napi); + struct net_device *dev = bp->dev; + int work_done; u32 status; status = macb_readl(bp, RSR); macb_writel(bp, RSR, status); + work_done = 0; if (!status) { /* * This may happen if an interrupt was pending before * this function was called last time, and no packets * have been received since. */ - netif_rx_complete(dev); + netif_rx_complete(dev, napi); goto out; } @@ -496,21 +498,13 @@ static int macb_poll(struct net_device * dev_warn(&bp->pdev->dev, "No RX buffers complete, status = %02lx\n", (unsigned long)status); - netif_rx_complete(dev); + netif_rx_complete(dev, napi); goto out; } - orig_budget = *budget; - if (orig_budget > dev->quota) - orig_budget = dev->quota; - - work_done = macb_rx(bp, orig_budget); - if (work_done < orig_budget) { - netif_rx_complete(dev); - retval = 0; - } else { - retval = 1; - } + work_done = macb_rx(bp, budget); + if (work_done < orig_budget) + netif_rx_complete(dev, napi); /* * We've done what we can to clean the buffers. Make sure we @@ -521,7 +515,7 @@ out: /* TODO: Handle errors */ - return retval; + return work_done; } static irqreturn_t macb_interrupt(int irq, void *dev_id) @@ -545,7 +539,7 @@ static irqreturn_t macb_interrupt(int ir } if (status & MACB_RX_INT_FLAGS) { - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &bp->napi)) { /* * There's no point taking any more interrupts * until we have processed the buffers @@ -553,7 +547,7 @@ static irqreturn_t macb_interrupt(int ir macb_writel(bp, IDR, MACB_RX_INT_FLAGS); dev_dbg(&bp->pdev->dev, "scheduling RX softirq\n"); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &bp->napi); } } @@ -937,6 +931,8 @@ static int macb_open(struct net_device * return err; } + napi_enable(&bp->napi); + macb_init_rings(bp); macb_init_hw(bp); @@ -954,6 +950,7 @@ static int macb_close(struct net_device unsigned long flags; netif_stop_queue(dev); + napi_disable(&bp->napi); if (bp->phy_dev) phy_stop(bp->phy_dev); @@ -1146,8 +1143,7 @@ static int __devinit macb_probe(struct p dev->get_stats = macb_get_stats; dev->set_multicast_list = macb_set_rx_mode; dev->do_ioctl = macb_ioctl; - dev->poll = macb_poll; - dev->weight = 64; + netif_napi_add(dev, &bp->napi, macb_poll, 64); dev->ethtool_ops = &macb_ethtool_ops; dev->base_addr = regs->start; diff -puN drivers/net/macb.h~git-net drivers/net/macb.h --- a/drivers/net/macb.h~git-net +++ a/drivers/net/macb.h @@ -374,6 +374,7 @@ struct macb { struct clk *pclk; struct clk *hclk; struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; struct macb_stats hw_stats; diff -puN drivers/net/mv643xx_eth.c~git-net drivers/net/mv643xx_eth.c --- a/drivers/net/mv643xx_eth.c~git-net +++ a/drivers/net/mv643xx_eth.c @@ -66,7 +66,7 @@ static int mv643xx_eth_change_mtu(struct static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *); static void eth_port_init_mac_tables(unsigned int eth_port_num); #ifdef MV643XX_NAPI -static int mv643xx_poll(struct net_device *dev, int *budget); +static int mv643xx_poll(struct net_device *dev, int budget); #endif static int ethernet_phy_get(unsigned int eth_port_num); static void ethernet_phy_set(unsigned int eth_port_num, int phy_addr); @@ -562,7 +562,7 @@ static irqreturn_t mv643xx_eth_int_handl /* wait for previous write to complete */ mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &bp->napi); } #else if (eth_int_cause & ETH_INT_CAUSE_RX) @@ -880,6 +880,10 @@ static int mv643xx_eth_open(struct net_d mv643xx_eth_rx_refill_descs(dev); /* Fill RX ring with skb's */ +#ifdef MV643XX_NAPI + napi_enable(&mp->napi); +#endif + eth_port_start(dev); /* Interrupt Coalescing */ @@ -982,7 +986,7 @@ static int mv643xx_eth_stop(struct net_d mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num)); #ifdef MV643XX_NAPI - netif_poll_disable(dev); + napi_disable(&mp->napi); #endif netif_carrier_off(dev); netif_stop_queue(dev); @@ -992,10 +996,6 @@ static int mv643xx_eth_stop(struct net_d mv643xx_eth_free_tx_rings(dev); mv643xx_eth_free_rx_rings(dev); -#ifdef MV643XX_NAPI - netif_poll_enable(dev); -#endif - free_irq(dev->irq, dev); return 0; @@ -1007,11 +1007,12 @@ static int mv643xx_eth_stop(struct net_d * * This function is used in case of NAPI */ -static int mv643xx_poll(struct net_device *dev, int *budget) +static int mv643xx_poll(struct napi_struct *napi, int budget) { - struct mv643xx_private *mp = netdev_priv(dev); - int done = 1, orig_budget, work_done; + struct mv643xx_private *mp = container_of(napi, struct mv643xx_private, napi); + struct net_device *dev = mp->dev; unsigned int port_num = mp->port_num; + int work_done; #ifdef MV643XX_TX_FAST_REFILL if (++mp->tx_clean_threshold > 5) { @@ -1020,27 +1021,20 @@ static int mv643xx_poll(struct net_devic } #endif + work_done = 0; if ((mv_read(MV643XX_ETH_RX_CURRENT_QUEUE_DESC_PTR_0(port_num))) - != (u32) mp->rx_used_desc_q) { - orig_budget = *budget; - if (orig_budget > dev->quota) - orig_budget = dev->quota; - work_done = mv643xx_eth_receive_queue(dev, orig_budget); - *budget -= work_done; - dev->quota -= work_done; - if (work_done >= orig_budget) - done = 0; - } + != (u32) mp->rx_used_desc_q) + work_done = mv643xx_eth_receive_queue(dev, budget); - if (done) { - netif_rx_complete(dev); + if (work_done < budget) { + netif_rx_complete(dev, napi); mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0); mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num), 0); mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num), ETH_INT_UNMASK_ALL); } - return done ? 0 : 1; + return work_done; } #endif @@ -1333,6 +1327,10 @@ static int mv643xx_eth_probe(struct plat platform_set_drvdata(pdev, dev); mp = netdev_priv(dev); + mp->dev = dev; +#ifdef MV643XX_NAPI + netif_napi_add(dev, &mp->napi, mv643xx_poll, 64); +#endif res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); BUG_ON(!res); @@ -1347,10 +1345,6 @@ static int mv643xx_eth_probe(struct plat /* No need to Tx Timeout */ dev->tx_timeout = mv643xx_eth_tx_timeout; -#ifdef MV643XX_NAPI - dev->poll = mv643xx_poll; - dev->weight = 64; -#endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = mv643xx_netpoll; diff -puN drivers/net/mv643xx_eth.h~git-net drivers/net/mv643xx_eth.h --- a/drivers/net/mv643xx_eth.h~git-net +++ a/drivers/net/mv643xx_eth.h @@ -318,6 +318,8 @@ struct mv643xx_private { struct work_struct tx_timeout_task; + struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; struct mv643xx_mib_counters mib_counters; spinlock_t lock; diff -puN drivers/net/myri10ge/myri10ge.c~git-net drivers/net/myri10ge/myri10ge.c --- a/drivers/net/myri10ge/myri10ge.c~git-net +++ a/drivers/net/myri10ge/myri10ge.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,8 @@ #include #include #include +#include +#include #include #include #include @@ -89,6 +92,8 @@ MODULE_LICENSE("Dual BSD/GPL"); #define MYRI10GE_EEPROM_STRINGS_SIZE 256 #define MYRI10GE_MAX_SEND_DESC_TSO ((65536 / 2048) * 2) +#define MYRI10GE_MAX_LRO_DESCRIPTORS 8 +#define MYRI10GE_LRO_MAX_PKTS 64 #define MYRI10GE_NO_CONFIRM_DATA htonl(0xffffffff) #define MYRI10GE_NO_RESPONSE_RESULT 0xffffffff @@ -151,6 +156,8 @@ struct myri10ge_rx_done { dma_addr_t bus; int cnt; int idx; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[MYRI10GE_MAX_LRO_DESCRIPTORS]; }; struct myri10ge_priv { @@ -163,6 +170,7 @@ struct myri10ge_priv { int small_bytes; int big_bytes; struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; u8 __iomem *sram; int sram_size; @@ -277,6 +285,14 @@ static int myri10ge_debug = -1; /* defau module_param(myri10ge_debug, int, 0); MODULE_PARM_DESC(myri10ge_debug, "Debug level (0=none,...,16=all)"); +static int myri10ge_lro = 1; +module_param(myri10ge_lro, int, S_IRUGO); +MODULE_PARM_DESC(myri10ge_lro, "Enable large receive offload\n"); + +static int myri10ge_lro_max_pkts = MYRI10GE_LRO_MAX_PKTS; +module_param(myri10ge_lro_max_pkts, int, S_IRUGO); +MODULE_PARM_DESC(myri10ge_lro, "Number of LRO packets to be aggregated\n"); + static int myri10ge_fill_thresh = 256; module_param(myri10ge_fill_thresh, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(myri10ge_fill_thresh, "Number of empty rx slots allowed\n"); @@ -1020,6 +1036,15 @@ myri10ge_rx_done(struct myri10ge_priv *m remainder -= MYRI10GE_ALLOC_SIZE; } + if (mgp->csum_flag && myri10ge_lro) { + rx_frags[0].page_offset += MXGEFW_PAD; + rx_frags[0].size -= MXGEFW_PAD; + len -= MXGEFW_PAD; + lro_receive_frags(&mgp->rx_done.lro_mgr, rx_frags, + len, len, (void *)(unsigned long)csum, csum); + return 1; + } + hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN; /* allocate an skb to attach the page(s) to. */ @@ -1100,7 +1125,7 @@ static inline void myri10ge_tx_done(stru } } -static inline void myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int *limit) +static inline int myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int budget) { struct myri10ge_rx_done *rx_done = &mgp->rx_done; unsigned long rx_bytes = 0; @@ -1109,10 +1134,11 @@ static inline void myri10ge_clean_rx_don int idx = rx_done->idx; int cnt = rx_done->cnt; + int work_done = 0; u16 length; __wsum checksum; - while (rx_done->entry[idx].length != 0 && *limit != 0) { + while (rx_done->entry[idx].length != 0 && work_done++ < budget) { length = ntohs(rx_done->entry[idx].length); rx_done->entry[idx].length = 0; checksum = csum_unfold(rx_done->entry[idx].checksum); @@ -1128,16 +1154,15 @@ static inline void myri10ge_clean_rx_don rx_bytes += rx_ok * (unsigned long)length; cnt++; idx = cnt & (myri10ge_max_intr_slots - 1); - - /* limit potential for livelock by only handling a - * limited number of frames. */ - (*limit)--; } rx_done->idx = idx; rx_done->cnt = cnt; mgp->stats.rx_packets += rx_packets; mgp->stats.rx_bytes += rx_bytes; + if (myri10ge_lro) + lro_flush_all(&rx_done->lro_mgr); + /* restock receive rings if needed */ if (mgp->rx_small.fill_cnt - mgp->rx_small.cnt < myri10ge_fill_thresh) myri10ge_alloc_rx_pages(mgp, &mgp->rx_small, @@ -1145,6 +1170,7 @@ static inline void myri10ge_clean_rx_don if (mgp->rx_big.fill_cnt - mgp->rx_big.cnt < myri10ge_fill_thresh) myri10ge_alloc_rx_pages(mgp, &mgp->rx_big, mgp->big_bytes, 0); + return work_done; } static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) @@ -1189,26 +1215,21 @@ static inline void myri10ge_check_statbl } } -static int myri10ge_poll(struct net_device *netdev, int *budget) +static int myri10ge_poll(struct napi_struct *napi, int budget) { - struct myri10ge_priv *mgp = netdev_priv(netdev); + struct myri10ge_priv *mgp = container_of(napi, struct myri10ge_priv, napi); + struct net_device *netdev = mgp->dev; struct myri10ge_rx_done *rx_done = &mgp->rx_done; - int limit, orig_limit, work_done; + int work_done; /* process as many rx events as NAPI will allow */ - limit = min(*budget, netdev->quota); - orig_limit = limit; - myri10ge_clean_rx_done(mgp, &limit); - work_done = orig_limit - limit; - *budget -= work_done; - netdev->quota -= work_done; + work_done = myri10ge_clean_rx_done(mgp, budget); if (rx_done->entry[rx_done->idx].length == 0 || !netif_running(netdev)) { - netif_rx_complete(netdev); + netif_rx_complete(netdev, napi); put_be32(htonl(3), mgp->irq_claim); - return 0; } - return 1; + return work_done; } static irqreturn_t myri10ge_intr(int irq, void *arg) @@ -1226,7 +1247,7 @@ static irqreturn_t myri10ge_intr(int irq /* low bit indicates receives are present, so schedule * napi poll handler */ if (stats->valid & 1) - netif_rx_schedule(mgp->dev); + netif_rx_schedule(mgp->dev, &mgp->napi); if (!mgp->msi_enabled) { put_be32(0, mgp->irq_deassert); @@ -1379,7 +1400,8 @@ static const char myri10ge_gstrings_stat "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32", "dropped_unicast_filtered", "dropped_multicast_filtered", "dropped_runt", "dropped_overrun", "dropped_no_small_buffer", - "dropped_no_big_buffer" + "dropped_no_big_buffer", "LRO aggregated", "LRO flushed", + "LRO avg aggr", "LRO no_desc" }; #define MYRI10GE_NET_STATS_LEN 21 @@ -1445,6 +1467,14 @@ myri10ge_get_ethtool_stats(struct net_de data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_overrun); data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_no_small_buffer); data[i++] = (unsigned int)ntohl(mgp->fw_stats->dropped_no_big_buffer); + data[i++] = mgp->rx_done.lro_mgr.stats.aggregated; + data[i++] = mgp->rx_done.lro_mgr.stats.flushed; + if (mgp->rx_done.lro_mgr.stats.flushed) + data[i++] = mgp->rx_done.lro_mgr.stats.aggregated / + mgp->rx_done.lro_mgr.stats.flushed; + else + data[i++] = 0; + data[i++] = mgp->rx_done.lro_mgr.stats.no_desc; } static void myri10ge_set_msglevel(struct net_device *netdev, u32 value) @@ -1718,10 +1748,69 @@ static void myri10ge_free_irq(struct myr pci_disable_msi(pdev); } +static int +myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr, + void **ip_hdr, void **tcpudp_hdr, + u64 * hdr_flags, void *priv) +{ + struct ethhdr *eh; + struct vlan_ethhdr *veh; + struct iphdr *iph; + u8 *va = page_address(frag->page) + frag->page_offset; + unsigned long ll_hlen; + __wsum csum = (__wsum) (unsigned long)priv; + + /* find the mac header, aborting if not IPv4 */ + + eh = (struct ethhdr *)va; + *mac_hdr = eh; + ll_hlen = ETH_HLEN; + if (eh->h_proto != htons(ETH_P_IP)) { + if (eh->h_proto == htons(ETH_P_8021Q)) { + veh = (struct vlan_ethhdr *)va; + if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP)) + return -1; + + ll_hlen += VLAN_HLEN; + + /* + * HW checksum starts ETH_HLEN bytes into + * frame, so we must subtract off the VLAN + * header's checksum before csum can be used + */ + csum = csum_sub(csum, csum_partial(va + ETH_HLEN, + VLAN_HLEN, 0)); + } else { + return -1; + } + } + *hdr_flags = LRO_IPV4; + + iph = (struct iphdr *)(va + ll_hlen); + *ip_hdr = iph; + if (iph->protocol != IPPROTO_TCP) + return -1; + *hdr_flags |= LRO_TCP; + *tcpudp_hdr = (u8 *) (*ip_hdr) + (iph->ihl << 2); + + /* verify the IP checksum */ + if (unlikely(ip_fast_csum((u8 *) iph, iph->ihl))) + return -1; + + /* verify the checksum */ + if (unlikely(csum_tcpudp_magic(iph->saddr, iph->daddr, + ntohs(iph->tot_len) - (iph->ihl << 2), + IPPROTO_TCP, csum))) + return -1; + + return 0; +} + static int myri10ge_open(struct net_device *dev) { struct myri10ge_priv *mgp; struct myri10ge_cmd cmd; + struct net_lro_mgr *lro_mgr; int status, big_pow2; mgp = netdev_priv(dev); @@ -1853,7 +1942,19 @@ static int myri10ge_open(struct net_devi mgp->link_state = htonl(~0U); mgp->rdma_tags_available = 15; - netif_poll_enable(mgp->dev); /* must happen prior to any irq */ + lro_mgr = &mgp->rx_done.lro_mgr; + lro_mgr->dev = dev; + lro_mgr->features = LRO_F_NAPI; + lro_mgr->ip_summed = CHECKSUM_COMPLETE; + lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; + lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS; + lro_mgr->lro_arr = mgp->rx_done.lro_desc; + lro_mgr->get_frag_header = myri10ge_get_frag_header; + lro_mgr->max_aggr = myri10ge_lro_max_pkts; + if (lro_mgr->max_aggr > MAX_SKB_FRAGS) + lro_mgr->max_aggr = MAX_SKB_FRAGS; + + napi_enable(&mgp->napi); /* must happen prior to any irq */ status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); if (status) { @@ -1897,7 +1998,7 @@ static int myri10ge_close(struct net_dev del_timer_sync(&mgp->watchdog_timer); mgp->running = MYRI10GE_ETH_STOPPING; - netif_poll_disable(mgp->dev); + napi_disable(&mgp->napi); netif_carrier_off(dev); netif_stop_queue(dev); old_down_cnt = mgp->down_cnt; @@ -2856,6 +2957,8 @@ static int myri10ge_probe(struct pci_dev mgp = netdev_priv(netdev); mgp->dev = netdev; + netif_napi_add(netdev, &mgp->napi, + myri10ge_poll, myri10ge_napi_weight); mgp->pdev = pdev; mgp->csum_flag = MXGEFW_FLAGS_CKSUM; mgp->pause = myri10ge_flow_control; @@ -2980,8 +3083,6 @@ static int myri10ge_probe(struct pci_dev netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; if (dac_enabled) netdev->features |= NETIF_F_HIGHDMA; - netdev->poll = myri10ge_poll; - netdev->weight = myri10ge_napi_weight; /* make sure we can get an irq, and that MSI can be * setup (if available). Also ensure netdev->irq diff -puN drivers/net/natsemi.c~git-net drivers/net/natsemi.c --- a/drivers/net/natsemi.c~git-net +++ a/drivers/net/natsemi.c @@ -560,6 +560,8 @@ struct netdev_private { /* address of a sent-in-place packet/buffer, for later free() */ struct sk_buff *tx_skbuff[TX_RING_SIZE]; dma_addr_t tx_dma[TX_RING_SIZE]; + struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; /* Media monitoring timer */ struct timer_list timer; @@ -636,7 +638,7 @@ static void init_registers(struct net_de static int start_tx(struct sk_buff *skb, struct net_device *dev); static irqreturn_t intr_handler(int irq, void *dev_instance); static void netdev_error(struct net_device *dev, int intr_status); -static int natsemi_poll(struct net_device *dev, int *budget); +static int natsemi_poll(struct napi_struct *napi, int budget); static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do); static void netdev_tx_done(struct net_device *dev); static int natsemi_change_mtu(struct net_device *dev, int new_mtu); @@ -861,6 +863,7 @@ static int __devinit natsemi_probe1 (str dev->irq = irq; np = netdev_priv(dev); + netif_napi_add(dev, &np->napi, natsemi_poll, 64); np->pci_dev = pdev; pci_set_drvdata(pdev, dev); @@ -931,8 +934,6 @@ static int __devinit natsemi_probe1 (str dev->do_ioctl = &netdev_ioctl; dev->tx_timeout = &tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; - dev->poll = natsemi_poll; - dev->weight = 64; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = &natsemi_poll_controller; @@ -1554,6 +1555,8 @@ static int netdev_open(struct net_device free_irq(dev->irq, dev); return i; } + napi_enable(&np->napi); + init_ring(dev); spin_lock_irq(&np->lock); init_registers(dev); @@ -2200,10 +2203,10 @@ static irqreturn_t intr_handler(int irq, prefetch(&np->rx_skbuff[np->cur_rx % RX_RING_SIZE]); - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &np->napi)) { /* Disable interrupts and register for poll */ natsemi_irq_disable(dev); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &np->napi); } else printk(KERN_WARNING "%s: Ignoring interrupt, status %#08x, mask %#08x.\n", @@ -2216,12 +2219,11 @@ static irqreturn_t intr_handler(int irq, /* This is the NAPI poll routine. As well as the standard RX handling * it also handles all other interrupts that the chip might raise. */ -static int natsemi_poll(struct net_device *dev, int *budget) +static int natsemi_poll(struct napi_struct *napi, int budget) { - struct netdev_private *np = netdev_priv(dev); + struct netdev_private *np = container_of(napi, struct netdev_private, napi); + struct net_device *dev = np->dev; void __iomem * ioaddr = ns_ioaddr(dev); - - int work_to_do = min(*budget, dev->quota); int work_done = 0; do { @@ -2236,7 +2238,7 @@ static int natsemi_poll(struct net_devic if (np->intr_status & (IntrRxDone | IntrRxIntr | RxStatusFIFOOver | IntrRxErr | IntrRxOverrun)) { - netdev_rx(dev, &work_done, work_to_do); + netdev_rx(dev, &work_done, budget); } if (np->intr_status & @@ -2250,16 +2252,13 @@ static int natsemi_poll(struct net_devic if (np->intr_status & IntrAbnormalSummary) netdev_error(dev, np->intr_status); - *budget -= work_done; - dev->quota -= work_done; - - if (work_done >= work_to_do) - return 1; + if (work_done >= budget) + return work_done; np->intr_status = readl(ioaddr + IntrStatus); } while (np->intr_status); - netif_rx_complete(dev); + netif_rx_complete(dev, napi); /* Reenable interrupts providing nothing is trying to shut * the chip down. */ @@ -2268,7 +2267,7 @@ static int natsemi_poll(struct net_devic natsemi_irq_enable(dev); spin_unlock(&np->lock); - return 0; + return work_done; } /* This routine is logically part of the interrupt handler, but separated @@ -3158,6 +3157,8 @@ static int netdev_close(struct net_devic dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx); + napi_disable(&np->napi); + /* * FIXME: what if someone tries to close a device * that is suspended? @@ -3253,7 +3254,7 @@ static void __devexit natsemi_remove1 (s * disable_irq() to enforce synchronization. * * natsemi_poll: checks before reenabling interrupts. suspend * sets hands_off, disables interrupts and then waits with - * netif_poll_disable(). + * napi_disable(). * * Interrupts must be disabled, otherwise hands_off can cause irq storms. */ @@ -3279,7 +3280,7 @@ static int natsemi_suspend (struct pci_d spin_unlock_irq(&np->lock); enable_irq(dev->irq); - netif_poll_disable(dev); + napi_disable(&np->napi); /* Update the error counts. */ __get_stats(dev); @@ -3320,6 +3321,8 @@ static int natsemi_resume (struct pci_de pci_enable_device(pdev); /* pci_power_on(pdev); */ + napi_enable(&np->napi); + natsemi_reset(dev); init_ring(dev); disable_irq(dev->irq); @@ -3333,7 +3336,6 @@ static int natsemi_resume (struct pci_de mod_timer(&np->timer, jiffies + 1*HZ); } netif_device_attach(dev); - netif_poll_enable(dev); out: rtnl_unlock(); return 0; diff -puN drivers/net/netconsole.c~git-net drivers/net/netconsole.c --- a/drivers/net/netconsole.c~git-net +++ a/drivers/net/netconsole.c @@ -35,95 +35,782 @@ ****************************************************************/ #include -#include #include #include #include -#include #include #include -#include -#include #include +#include +#include MODULE_AUTHOR("Maintainer: Matt Mackall "); MODULE_DESCRIPTION("Console driver for network interfaces"); MODULE_LICENSE("GPL"); -static char config[256]; -module_param_string(netconsole, config, 256, 0); +#define MAX_PARAM_LENGTH 256 +#define MAX_PRINT_CHUNK 1000 + +static char config[MAX_PARAM_LENGTH]; +module_param_string(netconsole, config, MAX_PARAM_LENGTH, 0); MODULE_PARM_DESC(netconsole, " netconsole=[src-port]@[src-ip]/[dev],[tgt-port]@/[tgt-macaddr]\n"); -static struct netpoll np = { - .name = "netconsole", - .dev_name = "eth0", - .local_port = 6665, - .remote_port = 6666, - .remote_mac = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, +#ifndef MODULE +static int __init option_setup(char *opt) +{ + strlcpy(config, opt, MAX_PARAM_LENGTH); + return 1; +} +__setup("netconsole=", option_setup); +#endif /* MODULE */ + +/* Linked list of all configured targets */ +static LIST_HEAD(target_list); + +/* This needs to be a spinlock because write_msg() cannot sleep */ +static DEFINE_SPINLOCK(target_list_lock); + +/** + * struct netconsole_target - Represents a configured netconsole target. + * @list: Links this target into the target_list. + * @item: Links us into the configfs subsystem hierarchy. + * @enabled: On / off knob to enable / disable target. + * Visible from userspace (read-write). + * We maintain a strict 1:1 correspondence between this and + * whether the corresponding netpoll is active or inactive. + * Also, other parameters of a target may be modified at + * runtime only when it is disabled (enabled == 0). + * @np: The netpoll structure for this target. + * Contains the other userspace visible parameters: + * dev_name (read-write) + * local_port (read-write) + * remote_port (read-write) + * local_ip (read-write) + * remote_ip (read-write) + * local_mac (read-only) + * remote_mac (read-write) + */ +struct netconsole_target { + struct list_head list; +#ifdef CONFIG_NETCONSOLE_DYNAMIC + struct config_item item; +#endif + int enabled; + struct netpoll np; }; -static int configured = 0; -#define MAX_PRINT_CHUNK 1000 +#ifdef CONFIG_NETCONSOLE_DYNAMIC -static void write_msg(struct console *con, const char *msg, unsigned int len) +static struct configfs_subsystem netconsole_subsys; + +static int __init dynamic_netconsole_init(void) { - int frag, left; - unsigned long flags; + config_group_init(&netconsole_subsys.su_group); + mutex_init(&netconsole_subsys.su_mutex); + return configfs_register_subsystem(&netconsole_subsys); +} - if (!np.dev) - return; +static void __exit dynamic_netconsole_exit(void) +{ + configfs_unregister_subsystem(&netconsole_subsys); +} + +/* + * Targets that were created by parsing the boot/module option string + * do not exist in the configfs hierarchy (and have NULL names) and will + * never go away, so make these a no-op for them. + */ +static void netconsole_target_get(struct netconsole_target *nt) +{ + if (config_item_name(&nt->item)) + config_item_get(&nt->item); +} + +static void netconsole_target_put(struct netconsole_target *nt) +{ + if (config_item_name(&nt->item)) + config_item_put(&nt->item); +} - local_irq_save(flags); +#else /* !CONFIG_NETCONSOLE_DYNAMIC */ - for(left = len; left; ) { - frag = min(left, MAX_PRINT_CHUNK); - netpoll_send_udp(&np, msg, frag); - msg += frag; - left -= frag; +static int __init dynamic_netconsole_init(void) +{ + return 0; +} + +static void __exit dynamic_netconsole_exit(void) +{ +} + +/* + * No danger of targets going away from under us when dynamic + * reconfigurability is off. + */ +static void netconsole_target_get(struct netconsole_target *nt) +{ +} + +static void netconsole_target_put(struct netconsole_target *nt) +{ +} + +#endif /* CONFIG_NETCONSOLE_DYNAMIC */ + +/* Allocate new target (from boot/module param) and setup netpoll for it */ +static struct netconsole_target *alloc_param_target(char *target_config) +{ + int err = -ENOMEM; + struct netconsole_target *nt; + + /* + * Allocate and initialize with defaults. + * Note that these targets get their config_item fields zeroed-out. + */ + nt = kzalloc(sizeof(*nt), GFP_KERNEL); + if (!nt) { + printk(KERN_ERR "netconsole: failed to allocate memory\n"); + goto fail; } - local_irq_restore(flags); + nt->np.name = "netconsole"; + strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ); + nt->np.local_port = 6665; + nt->np.remote_port = 6666; + memset(nt->np.remote_mac, 0xff, ETH_ALEN); + + /* Parse parameters and setup netpoll */ + err = netpoll_parse_options(&nt->np, target_config); + if (err) + goto fail; + + err = netpoll_setup(&nt->np); + if (err) + goto fail; + + nt->enabled = 1; + + return nt; + +fail: + kfree(nt); + return ERR_PTR(err); } -static struct console netconsole = { - .name = "netcon", - .flags = CON_ENABLED | CON_PRINTBUFFER, - .write = write_msg +/* Cleanup netpoll for given target (from boot/module param) and free it */ +static void free_param_target(struct netconsole_target *nt) +{ + netpoll_cleanup(&nt->np); + kfree(nt); +} + +#ifdef CONFIG_NETCONSOLE_DYNAMIC + +/* + * Our subsystem hierarchy is: + * + * /sys/kernel/config/netconsole/ + * | + * / + * | enabled + * | dev_name + * | local_port + * | remote_port + * | local_ip + * | remote_ip + * | local_mac + * | remote_mac + * | + * /... + */ + +struct netconsole_target_attr { + struct configfs_attribute attr; + ssize_t (*show)(struct netconsole_target *nt, + char *buf); + ssize_t (*store)(struct netconsole_target *nt, + const char *buf, + size_t count); }; -static int option_setup(char *opt) +static struct netconsole_target *to_target(struct config_item *item) { - configured = !netpoll_parse_options(&np, opt); - return 1; + return item ? + container_of(item, struct netconsole_target, item) : + NULL; } -__setup("netconsole=", option_setup); +/* + * Wrapper over simple_strtol (base 10) with sanity and range checking. + * We return (signed) long only because we may want to return errors. + * Do not use this to convert numbers that are allowed to be negative. + */ +static long strtol10_check_range(const char *cp, long min, long max) +{ + long ret; + char *p = (char *) cp; + + WARN_ON(min < 0); + WARN_ON(max < min); + + ret = simple_strtol(p, &p, 10); -static int init_netconsole(void) + if (*p && (*p != '\n')) { + printk(KERN_ERR "netconsole: invalid input\n"); + return -EINVAL; + } + if ((ret < min) || (ret > max)) { + printk(KERN_ERR "netconsole: input %ld must be between " + "%ld and %ld\n", ret, min, max); + return -EINVAL; + } + + return ret; +} + +/* + * Attribute operations for netconsole_target. + */ + +static ssize_t show_enabled(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", nt->enabled); +} + +static ssize_t show_dev_name(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%s\n", nt->np.dev_name); +} + +static ssize_t show_local_port(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", nt->np.local_port); +} + +static ssize_t show_remote_port(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", nt->np.remote_port); +} + +static ssize_t show_local_ip(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d.%d.%d.%d\n", + HIPQUAD(nt->np.local_ip)); +} + +static ssize_t show_remote_ip(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d.%d.%d.%d\n", + HIPQUAD(nt->np.remote_ip)); +} + +static ssize_t show_local_mac(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%02x:%02x:%02x:%02x:%02x:%02x\n", + nt->np.local_mac[0], nt->np.local_mac[1], + nt->np.local_mac[2], nt->np.local_mac[3], + nt->np.local_mac[4], nt->np.local_mac[5]); +} + +static ssize_t show_remote_mac(struct netconsole_target *nt, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%02x:%02x:%02x:%02x:%02x:%02x\n", + nt->np.remote_mac[0], nt->np.remote_mac[1], + nt->np.remote_mac[2], nt->np.remote_mac[3], + nt->np.remote_mac[4], nt->np.remote_mac[5]); +} + +/* + * This one is special -- targets created through the configfs interface + * are not enabled (and the corresponding netpoll activated) by default. + * The user is expected to set the desired parameters first (which + * would enable him to dynamically add new netpoll targets for new + * network interfaces as and when they come up). + */ +static ssize_t store_enabled(struct netconsole_target *nt, + const char *buf, + size_t count) { int err; + long enabled; + + enabled = strtol10_check_range(buf, 0, 1); + if (enabled < 0) + return enabled; + + if (enabled) { /* 1 */ + + /* + * Skip netpoll_parse_options() -- all the attributes are + * already configured via configfs. Just print them out. + */ + netpoll_print_options(&nt->np); + + err = netpoll_setup(&nt->np); + if (err) + return err; + + printk(KERN_INFO "netconsole: network logging started\n"); + + } else { /* 0 */ + netpoll_cleanup(&nt->np); + } + + nt->enabled = enabled; + + return strnlen(buf, count); +} + +static ssize_t store_dev_name(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + size_t len; + + if (nt->enabled) { + printk(KERN_ERR "netconsole: target (%s) is enabled, " + "disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + strlcpy(nt->np.dev_name, buf, IFNAMSIZ); + + /* Get rid of possible trailing newline from echo(1) */ + len = strnlen(nt->np.dev_name, IFNAMSIZ); + if (nt->np.dev_name[len - 1] == '\n') + nt->np.dev_name[len - 1] = '\0'; + + return strnlen(buf, count); +} + +static ssize_t store_local_port(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + long local_port; +#define __U16_MAX ((__u16) ~0U) + + if (nt->enabled) { + printk(KERN_ERR "netconsole: target (%s) is enabled, " + "disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + local_port = strtol10_check_range(buf, 0, __U16_MAX); + if (local_port < 0) + return local_port; + + nt->np.local_port = local_port; + + return strnlen(buf, count); +} + +static ssize_t store_remote_port(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + long remote_port; +#define __U16_MAX ((__u16) ~0U) + + if (nt->enabled) { + printk(KERN_ERR "netconsole: target (%s) is enabled, " + "disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + remote_port = strtol10_check_range(buf, 0, __U16_MAX); + if (remote_port < 0) + return remote_port; + + nt->np.remote_port = remote_port; + + return strnlen(buf, count); +} + +static ssize_t store_local_ip(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + if (nt->enabled) { + printk(KERN_ERR "netconsole: target (%s) is enabled, " + "disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + nt->np.local_ip = ntohl(in_aton(buf)); + + return strnlen(buf, count); +} + +static ssize_t store_remote_ip(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + if (nt->enabled) { + printk(KERN_ERR "netconsole: target (%s) is enabled, " + "disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + nt->np.remote_ip = ntohl(in_aton(buf)); + + return strnlen(buf, count); +} + +static ssize_t store_remote_mac(struct netconsole_target *nt, + const char *buf, + size_t count) +{ + u8 remote_mac[ETH_ALEN]; + char *p = (char *) buf; + int i; + + if (nt->enabled) { + printk(KERN_ERR "netconsole: target (%s) is enabled, " + "disable to update parameters\n", + config_item_name(&nt->item)); + return -EINVAL; + } + + for (i = 0; i < ETH_ALEN - 1; i++) { + remote_mac[i] = simple_strtoul(p, &p, 16); + if (*p != ':') + goto invalid; + p++; + } + remote_mac[ETH_ALEN - 1] = simple_strtoul(p, &p, 16); + if (*p && (*p != '\n')) + goto invalid; + + memcpy(nt->np.remote_mac, remote_mac, ETH_ALEN); + + return strnlen(buf, count); + +invalid: + printk(KERN_ERR "netconsole: invalid input\n"); + return -EINVAL; +} + +/* + * Attribute definitions for netconsole_target. + */ + +#define NETCONSOLE_TARGET_ATTR_RO(_name) \ +static struct netconsole_target_attr netconsole_target_##_name = \ + __CONFIGFS_ATTR(_name, S_IRUGO, show_##_name, NULL) + +#define NETCONSOLE_TARGET_ATTR_RW(_name) \ +static struct netconsole_target_attr netconsole_target_##_name = \ + __CONFIGFS_ATTR(_name, S_IRUGO | S_IWUSR, show_##_name, store_##_name) + +NETCONSOLE_TARGET_ATTR_RW(enabled); +NETCONSOLE_TARGET_ATTR_RW(dev_name); +NETCONSOLE_TARGET_ATTR_RW(local_port); +NETCONSOLE_TARGET_ATTR_RW(remote_port); +NETCONSOLE_TARGET_ATTR_RW(local_ip); +NETCONSOLE_TARGET_ATTR_RW(remote_ip); +NETCONSOLE_TARGET_ATTR_RO(local_mac); +NETCONSOLE_TARGET_ATTR_RW(remote_mac); + +static struct configfs_attribute *netconsole_target_attrs[] = { + &netconsole_target_enabled.attr, + &netconsole_target_dev_name.attr, + &netconsole_target_local_port.attr, + &netconsole_target_remote_port.attr, + &netconsole_target_local_ip.attr, + &netconsole_target_remote_ip.attr, + &netconsole_target_local_mac.attr, + &netconsole_target_remote_mac.attr, + NULL, +}; + +/* + * Item operations and type for netconsole_target. + */ + +static void netconsole_target_release(struct config_item *item) +{ + kfree(to_target(item)); +} + +static ssize_t netconsole_target_attr_show(struct config_item *item, + struct configfs_attribute *attr, + char *buf) +{ + ssize_t ret = -EINVAL; + struct netconsole_target *nt = to_target(item); + struct netconsole_target_attr *na = + container_of(attr, struct netconsole_target_attr, attr); + + if (na->show) + ret = na->show(nt, buf); + + return ret; +} + +static ssize_t netconsole_target_attr_store(struct config_item *item, + struct configfs_attribute *attr, + const char *buf, + size_t count) +{ + ssize_t ret = -EINVAL; + struct netconsole_target *nt = to_target(item); + struct netconsole_target_attr *na = + container_of(attr, struct netconsole_target_attr, attr); + + if (na->store) + ret = na->store(nt, buf, count); + + return ret; +} + +static struct configfs_item_operations netconsole_target_item_ops = { + .release = netconsole_target_release, + .show_attribute = netconsole_target_attr_show, + .store_attribute = netconsole_target_attr_store, +}; + +static struct config_item_type netconsole_target_type = { + .ct_attrs = netconsole_target_attrs, + .ct_item_ops = &netconsole_target_item_ops, + .ct_owner = THIS_MODULE, +}; + +/* + * Group operations and type for netconsole_subsys. + */ + +static struct config_item *make_netconsole_target(struct config_group *group, + const char *name) +{ + unsigned long flags; + struct netconsole_target *nt; + + /* + * Allocate and initialize with defaults. + * Target is disabled at creation (enabled == 0). + */ + nt = kzalloc(sizeof(*nt), GFP_KERNEL); + if (!nt) { + printk(KERN_ERR "netconsole: failed to allocate memory\n"); + return NULL; + } + + nt->np.name = "netconsole"; + strlcpy(nt->np.dev_name, "eth0", IFNAMSIZ); + nt->np.local_port = 6665; + nt->np.remote_port = 6666; + memset(nt->np.remote_mac, 0xff, ETH_ALEN); + + /* Initialize the config_item member */ + config_item_init_type_name(&nt->item, name, &netconsole_target_type); + + /* Adding, but it is disabled */ + spin_lock_irqsave(&target_list_lock, flags); + list_add(&nt->list, &target_list); + spin_unlock_irqrestore(&target_list_lock, flags); + + return &nt->item; +} + +static void drop_netconsole_target(struct config_group *group, + struct config_item *item) +{ + unsigned long flags; + struct netconsole_target *nt = to_target(item); + + spin_lock_irqsave(&target_list_lock, flags); + list_del(&nt->list); + spin_unlock_irqrestore(&target_list_lock, flags); + + /* + * The target may have never been enabled, or was manually disabled + * before being removed so netpoll may have already been cleaned up. + */ + if (nt->enabled) + netpoll_cleanup(&nt->np); + + config_item_put(&nt->item); +} + +static struct configfs_group_operations netconsole_subsys_group_ops = { + .make_item = make_netconsole_target, + .drop_item = drop_netconsole_target, +}; + +static struct config_item_type netconsole_subsys_type = { + .ct_group_ops = &netconsole_subsys_group_ops, + .ct_owner = THIS_MODULE, +}; - if(strlen(config)) - option_setup(config); +/* The netconsole configfs subsystem */ +static struct configfs_subsystem netconsole_subsys = { + .su_group = { + .cg_item = { + .ci_namebuf = "netconsole", + .ci_type = &netconsole_subsys_type, + }, + }, +}; + +#endif /* CONFIG_NETCONSOLE_DYNAMIC */ - if(!configured) { - printk("netconsole: not configured, aborting\n"); - return 0; +/* Handle network interface device notifications */ +static int netconsole_netdev_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + unsigned long flags; + struct netconsole_target *nt; + struct net_device *dev = ptr; + + if (!(event == NETDEV_CHANGEADDR || event == NETDEV_CHANGENAME)) + goto done; + + spin_lock_irqsave(&target_list_lock, flags); + list_for_each_entry(nt, &target_list, list) { + netconsole_target_get(nt); + if (nt->np.dev == dev) { + switch (event) { + case NETDEV_CHANGEADDR: + memcpy(nt->np.local_mac, dev->dev_addr, ETH_ALEN); + break; + + case NETDEV_CHANGENAME: + strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ); + break; + } + } + netconsole_target_put(nt); } + spin_unlock_irqrestore(&target_list_lock, flags); + +done: + return NOTIFY_DONE; +} + +static struct notifier_block netconsole_netdev_notifier = { + .notifier_call = netconsole_netdev_event, +}; - err = netpoll_setup(&np); +static void write_msg(struct console *con, const char *msg, unsigned int len) +{ + int frag, left; + unsigned long flags; + struct netconsole_target *nt; + const char *tmp; + + /* Avoid taking lock and disabling interrupts unnecessarily */ + if (list_empty(&target_list)) + return; + + spin_lock_irqsave(&target_list_lock, flags); + list_for_each_entry(nt, &target_list, list) { + netconsole_target_get(nt); + if (nt->enabled && netif_running(nt->np.dev)) { + /* + * We nest this inside the for-each-target loop above + * so that we're able to get as much logging out to + * at least one target if we die inside here, instead + * of unnecessarily keeping all targets in lock-step. + */ + tmp = msg; + for (left = len; left;) { + frag = min(left, MAX_PRINT_CHUNK); + netpoll_send_udp(&nt->np, tmp, frag); + tmp += frag; + left -= frag; + } + } + netconsole_target_put(nt); + } + spin_unlock_irqrestore(&target_list_lock, flags); +} + +static struct console netconsole = { + .name = "netcon", + .flags = CON_ENABLED | CON_PRINTBUFFER, + .write = write_msg, +}; + +static int __init init_netconsole(void) +{ + int err; + struct netconsole_target *nt, *tmp; + unsigned long flags; + char *target_config; + char *input = config; + + if (strnlen(input, MAX_PARAM_LENGTH)) { + while ((target_config = strsep(&input, ";"))) { + nt = alloc_param_target(target_config); + if (IS_ERR(nt)) { + err = PTR_ERR(nt); + goto fail; + } + spin_lock_irqsave(&target_list_lock, flags); + list_add(&nt->list, &target_list); + spin_unlock_irqrestore(&target_list_lock, flags); + } + } + + err = register_netdevice_notifier(&netconsole_netdev_notifier); + if (err) + goto fail; + + err = dynamic_netconsole_init(); if (err) - return err; + goto undonotifier; register_console(&netconsole); printk(KERN_INFO "netconsole: network logging started\n"); - return 0; + + return err; + +undonotifier: + unregister_netdevice_notifier(&netconsole_netdev_notifier); + +fail: + printk(KERN_ERR "netconsole: cleaning up\n"); + + /* + * Remove all targets and destroy them (only targets created + * from the boot/module option exist here). Skipping the list + * lock is safe here, and netpoll_cleanup() will sleep. + */ + list_for_each_entry_safe(nt, tmp, &target_list, list) { + list_del(&nt->list); + free_param_target(nt); + } + + return err; } -static void cleanup_netconsole(void) +static void __exit cleanup_netconsole(void) { + struct netconsole_target *nt, *tmp; + unregister_console(&netconsole); - netpoll_cleanup(&np); + dynamic_netconsole_exit(); + unregister_netdevice_notifier(&netconsole_netdev_notifier); + + /* + * Targets created via configfs pin references on our module + * and would first be rmdir(2)'ed from userspace. We reach + * here only when they are already destroyed, and only those + * created from the boot/module option are left, so remove and + * destroy them. Skipping the list lock is safe here, and + * netpoll_cleanup() will sleep. + */ + list_for_each_entry_safe(nt, tmp, &target_list, list) { + list_del(&nt->list); + free_param_target(nt); + } } module_init(init_netconsole); diff -puN drivers/net/netxen/netxen_nic.h~git-net drivers/net/netxen/netxen_nic.h --- a/drivers/net/netxen/netxen_nic.h~git-net +++ a/drivers/net/netxen/netxen_nic.h @@ -880,6 +880,7 @@ struct netxen_adapter { struct netxen_adapter *master; struct net_device *netdev; struct pci_dev *pdev; + struct napi_struct napi; struct net_device_stats net_stats; unsigned char mac_addr[ETH_ALEN]; int mtu; diff -puN drivers/net/netxen/netxen_nic_main.c~git-net drivers/net/netxen/netxen_nic_main.c --- a/drivers/net/netxen/netxen_nic_main.c~git-net +++ a/drivers/net/netxen/netxen_nic_main.c @@ -67,7 +67,7 @@ static void netxen_tx_timeout(struct net static void netxen_tx_timeout_task(struct work_struct *work); static void netxen_watchdog(unsigned long); static int netxen_handle_int(struct netxen_adapter *, struct net_device *); -static int netxen_nic_poll(struct net_device *dev, int *budget); +static int netxen_nic_poll(struct napi_struct *napi, int budget); #ifdef CONFIG_NET_POLL_CONTROLLER static void netxen_nic_poll_controller(struct net_device *netdev); #endif @@ -400,6 +400,9 @@ netxen_nic_probe(struct pci_dev *pdev, c adapter->netdev = netdev; adapter->pdev = pdev; + netif_napi_add(netdev, &adapter->napi, + netxen_nic_poll, NETXEN_NETDEV_WEIGHT); + /* this will be read from FW later */ adapter->intr_scheme = -1; @@ -420,8 +423,6 @@ netxen_nic_probe(struct pci_dev *pdev, c netxen_nic_change_mtu(netdev, netdev->mtu); SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops); - netdev->poll = netxen_nic_poll; - netdev->weight = NETXEN_NETDEV_WEIGHT; #ifdef CONFIG_NET_POLL_CONTROLLER netdev->poll_controller = netxen_nic_poll_controller; #endif @@ -888,6 +889,8 @@ static int netxen_nic_open(struct net_de if (!adapter->driver_mismatch) mod_timer(&adapter->watchdog_timer, jiffies); + napi_enable(&adapter->napi); + netxen_nic_enable_int(adapter); /* Done here again so that even if phantom sw overwrote it, @@ -897,6 +900,7 @@ static int netxen_nic_open(struct net_de del_timer_sync(&adapter->watchdog_timer); printk(KERN_ERR "%s: Failed to initialize port %d\n", netxen_nic_driver_name, adapter->portnum); + napi_disable(&adapter->napi); return -EIO; } if (adapter->macaddr_set) @@ -926,6 +930,7 @@ static int netxen_nic_close(struct net_d netif_carrier_off(netdev); netif_stop_queue(netdev); + napi_disable(&adapter->napi); netxen_nic_disable_int(adapter); @@ -1246,11 +1251,11 @@ netxen_handle_int(struct netxen_adapter netxen_nic_disable_int(adapter); if (netxen_nic_rx_has_work(adapter) || netxen_nic_tx_has_work(adapter)) { - if (netif_rx_schedule_prep(netdev)) { + if (netif_rx_schedule_prep(netdev, &adapter->napi)) { /* * Interrupts are already disabled. */ - __netif_rx_schedule(netdev); + __netif_rx_schedule(netdev, &adapter->napi); } else { static unsigned int intcount = 0; if ((++intcount & 0xfff) == 0xfff) @@ -1308,14 +1313,13 @@ irqreturn_t netxen_intr(int irq, void *d return IRQ_HANDLED; } -static int netxen_nic_poll(struct net_device *netdev, int *budget) +static int netxen_nic_poll(struct napi_struct *napi, int budget) { - struct netxen_adapter *adapter = netdev_priv(netdev); - int work_to_do = min(*budget, netdev->quota); + struct netxen_adapter *adapter = container_of(napi, struct netxen_adapter, napi); + struct net_device *netdev = adapter->netdev; int done = 1; int ctx; - int this_work_done; - int work_done = 0; + int work_done; DPRINTK(INFO, "polling for %d descriptors\n", *budget); @@ -1333,16 +1337,11 @@ static int netxen_nic_poll(struct net_de * packets are on one context, it gets only half of the quota, * and ends up not processing it. */ - this_work_done = netxen_process_rcv_ring(adapter, ctx, - work_to_do / - MAX_RCV_CTX); - work_done += this_work_done; + work_done += netxen_process_rcv_ring(adapter, ctx, + budget / MAX_RCV_CTX); } - netdev->quota -= work_done; - *budget -= work_done; - - if (work_done >= work_to_do && netxen_nic_rx_has_work(adapter) != 0) + if (work_done >= budget && netxen_nic_rx_has_work(adapter) != 0) done = 0; if (netxen_process_cmd_ring((unsigned long)adapter) == 0) @@ -1351,11 +1350,11 @@ static int netxen_nic_poll(struct net_de DPRINTK(INFO, "new work_done: %d work_to_do: %d\n", work_done, work_to_do); if (done) { - netif_rx_complete(netdev); + netif_rx_complete(netdev, napi); netxen_nic_enable_int(adapter); } - return !done; + return work_done; } #ifdef CONFIG_NET_POLL_CONTROLLER diff -puN drivers/net/pasemi_mac.c~git-net drivers/net/pasemi_mac.c --- a/drivers/net/pasemi_mac.c~git-net +++ a/drivers/net/pasemi_mac.c @@ -584,7 +584,7 @@ static irqreturn_t pasemi_mac_rx_intr(in if (*mac->rx_status & PAS_STATUS_TIMER) reg |= PAS_IOB_DMA_RXCH_RESET_TINTC; - netif_rx_schedule(dev); + netif_rx_schedule(dev, &mac->napi); pci_write_config_dword(mac->iob_pdev, PAS_IOB_DMA_RXCH_RESET(mac->dma_rxch), reg); @@ -808,7 +808,7 @@ static int pasemi_mac_open(struct net_de dev_warn(&mac->pdev->dev, "phy init failed: %d\n", ret); netif_start_queue(dev); - netif_poll_enable(dev); + napi_enable(&mac->napi); /* Interrupts are a bit different for our DMA controller: While * it's got one a regular PCI device header, the interrupt there @@ -845,7 +845,7 @@ static int pasemi_mac_open(struct net_de out_rx_int: free_irq(mac->tx_irq, dev); out_tx_int: - netif_poll_disable(dev); + napi_disable(&mac->napi); netif_stop_queue(dev); pasemi_mac_free_tx_resources(dev); out_tx_resources: @@ -869,6 +869,7 @@ static int pasemi_mac_close(struct net_d } netif_stop_queue(dev); + napi_disable(&mac->napi); /* Clean out any pending buffers */ pasemi_mac_clean_tx(mac); @@ -1047,26 +1048,20 @@ static void pasemi_mac_set_rx_mode(struc } -static int pasemi_mac_poll(struct net_device *dev, int *budget) +static int pasemi_mac_poll(struct napi_struct *napi, int budget) { - int pkts, limit = min(*budget, dev->quota); - struct pasemi_mac *mac = netdev_priv(dev); - - pkts = pasemi_mac_clean_rx(mac, limit); + struct pasemi_mac *mac = container_of(napi, struct pasemi_mac, napi); + struct net_device *dev = mac->netdev; + int pkts; - dev->quota -= pkts; - *budget -= pkts; - - if (pkts < limit) { + pkts = pasemi_mac_clean_rx(mac, budget); + if (pkts < budget) { /* all done, no more packets present */ - netif_rx_complete(dev); + netif_rx_complete(dev, napi); pasemi_mac_restart_rx_intr(mac); - return 0; - } else { - /* used up our quantum, so reschedule */ - return 1; } + return pkts; } static int __devinit @@ -1099,6 +1094,10 @@ pasemi_mac_probe(struct pci_dev *pdev, c mac->netdev = dev; mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL); + netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64); + + dev->features = NETIF_F_HW_CSUM; + if (!mac->dma_pdev) { dev_err(&pdev->dev, "Can't find DMA Controller\n"); err = -ENODEV; @@ -1150,9 +1149,6 @@ pasemi_mac_probe(struct pci_dev *pdev, c dev->hard_start_xmit = pasemi_mac_start_tx; dev->get_stats = pasemi_mac_get_stats; dev->set_multicast_list = pasemi_mac_set_rx_mode; - dev->weight = 64; - dev->poll = pasemi_mac_poll; - dev->features = NETIF_F_HW_CSUM; /* The dma status structure is located in the I/O bridge, and * is cache coherent. diff -puN drivers/net/pasemi_mac.h~git-net drivers/net/pasemi_mac.h --- a/drivers/net/pasemi_mac.h~git-net +++ a/drivers/net/pasemi_mac.h @@ -56,6 +56,7 @@ struct pasemi_mac { struct pci_dev *dma_pdev; struct pci_dev *iob_pdev; struct phy_device *phydev; + struct napi_struct napi; struct net_device_stats stats; /* Pointer to the cacheable per-channel status registers */ diff -puN drivers/net/pcnet32.c~git-net drivers/net/pcnet32.c --- a/drivers/net/pcnet32.c~git-net +++ a/drivers/net/pcnet32.c @@ -280,6 +280,8 @@ struct pcnet32_private { unsigned int dirty_rx, /* ring entries to be freed. */ dirty_tx; + struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; char tx_full; char phycount; /* number of phys found */ @@ -440,15 +442,21 @@ static struct pcnet32_access pcnet32_dwi static void pcnet32_netif_stop(struct net_device *dev) { + struct pcnet32_private *lp = netdev_priv(dev); dev->trans_start = jiffies; - netif_poll_disable(dev); +#ifdef CONFIG_PCNET32_NAPI + napi_disable(&lp->napi); +#endif netif_tx_disable(dev); } static void pcnet32_netif_start(struct net_device *dev) { + struct pcnet32_private *lp = netdev_priv(dev); netif_wake_queue(dev); - netif_poll_enable(dev); +#ifdef CONFIG_PCNET32_NAPI + napi_enable(&lp->napi); +#endif } /* @@ -816,7 +824,7 @@ static int pcnet32_set_ringparam(struct if ((1 << i) != lp->rx_ring_size) pcnet32_realloc_rx_ring(dev, lp, i); - dev->weight = lp->rx_ring_size / 2; + lp->napi.weight = lp->rx_ring_size / 2; if (netif_running(dev)) { pcnet32_netif_start(dev); @@ -1255,7 +1263,7 @@ static void pcnet32_rx_entry(struct net_ return; } -static int pcnet32_rx(struct net_device *dev, int quota) +static int pcnet32_rx(struct net_device *dev, int budget) { struct pcnet32_private *lp = netdev_priv(dev); int entry = lp->cur_rx & lp->rx_mod_mask; @@ -1263,7 +1271,7 @@ static int pcnet32_rx(struct net_device int npackets = 0; /* If we own the next entry, it's a new packet. Send it up. */ - while (quota > npackets && (short)le16_to_cpu(rxp->status) >= 0) { + while (npackets < budget && (short)le16_to_cpu(rxp->status) >= 0) { pcnet32_rx_entry(dev, lp, rxp, entry); npackets += 1; /* @@ -1379,15 +1387,16 @@ static int pcnet32_tx(struct net_device } #ifdef CONFIG_PCNET32_NAPI -static int pcnet32_poll(struct net_device *dev, int *budget) +static int pcnet32_poll(struct napi_struct *napi, int budget) { - struct pcnet32_private *lp = netdev_priv(dev); - int quota = min(dev->quota, *budget); + struct pcnet32_private *lp = container_of(napi, struct pcnet32_private, napi); + struct net_device *dev = lp->dev; unsigned long ioaddr = dev->base_addr; unsigned long flags; + int work_done; u16 val; - quota = pcnet32_rx(dev, quota); + work_done = pcnet32_rx(dev, budget); spin_lock_irqsave(&lp->lock, flags); if (pcnet32_tx(dev)) { @@ -1399,28 +1408,22 @@ static int pcnet32_poll(struct net_devic } spin_unlock_irqrestore(&lp->lock, flags); - *budget -= quota; - dev->quota -= quota; - - if (dev->quota == 0) { - return 1; - } - - netif_rx_complete(dev); - - spin_lock_irqsave(&lp->lock, flags); + if (work_done < budget) { + spin_lock_irqsave(&lp->lock, flags); - /* clear interrupt masks */ - val = lp->a.read_csr(ioaddr, CSR3); - val &= 0x00ff; - lp->a.write_csr(ioaddr, CSR3, val); + __netif_rx_complete(dev, napi); - /* Set interrupt enable. */ - lp->a.write_csr(ioaddr, CSR0, CSR0_INTEN); - mmiowb(); - spin_unlock_irqrestore(&lp->lock, flags); + /* clear interrupt masks */ + val = lp->a.read_csr(ioaddr, CSR3); + val &= 0x00ff; + lp->a.write_csr(ioaddr, CSR3, val); - return 0; + /* Set interrupt enable. */ + lp->a.write_csr(ioaddr, CSR0, CSR0_INTEN); + mmiowb(); + spin_unlock_irqrestore(&lp->lock, flags); + } + return work_done; } #endif @@ -1815,6 +1818,8 @@ pcnet32_probe1(unsigned long ioaddr, int } lp->pci_dev = pdev; + lp->dev = dev; + spin_lock_init(&lp->lock); SET_MODULE_OWNER(dev); @@ -1843,6 +1848,10 @@ pcnet32_probe1(unsigned long ioaddr, int lp->mii_if.mdio_read = mdio_read; lp->mii_if.mdio_write = mdio_write; +#ifdef CONFIG_PCNET32_NAPI + netif_napi_add(dev, &lp->napi, pcnet32_poll, lp->rx_ring_size / 2); +#endif + if (fdx && !(lp->options & PCNET32_PORT_ASEL) && ((cards_found >= MAX_UNITS) || full_duplex[cards_found])) lp->options |= PCNET32_PORT_FD; @@ -1953,10 +1962,6 @@ pcnet32_probe1(unsigned long ioaddr, int dev->ethtool_ops = &pcnet32_ethtool_ops; dev->tx_timeout = pcnet32_tx_timeout; dev->watchdog_timeo = (5 * HZ); - dev->weight = lp->rx_ring_size / 2; -#ifdef CONFIG_PCNET32_NAPI - dev->poll = pcnet32_poll; -#endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = pcnet32_poll_controller; @@ -2276,6 +2281,10 @@ static int pcnet32_open(struct net_devic goto err_free_ring; } +#ifdef CONFIG_PCNET32_NAPI + napi_enable(&lp->napi); +#endif + /* Re-initialize the PCNET32, and start it when done. */ lp->a.write_csr(ioaddr, 1, (lp->init_dma_addr & 0xffff)); lp->a.write_csr(ioaddr, 2, (lp->init_dma_addr >> 16)); @@ -2599,18 +2608,18 @@ pcnet32_interrupt(int irq, void *dev_id) /* unlike for the lance, there is no restart needed */ } #ifdef CONFIG_PCNET32_NAPI - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &lp->napi)) { u16 val; /* set interrupt masks */ val = lp->a.read_csr(ioaddr, CSR3); val |= 0x5f00; lp->a.write_csr(ioaddr, CSR3, val); mmiowb(); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &lp->napi); break; } #else - pcnet32_rx(dev, dev->weight); + pcnet32_rx(dev, dev->napi.weight); if (pcnet32_tx(dev)) { /* reset the chip to clear the error condition, then restart */ lp->a.reset(ioaddr); @@ -2645,6 +2654,9 @@ static int pcnet32_close(struct net_devi del_timer_sync(&lp->watchdog_timer); netif_stop_queue(dev); +#ifdef CONFIG_PCNET32_NAPI + napi_disable(&lp->napi); +#endif spin_lock_irqsave(&lp->lock, flags); diff -puN drivers/net/ps3_gelic_net.c~git-net drivers/net/ps3_gelic_net.c --- a/drivers/net/ps3_gelic_net.c~git-net +++ a/drivers/net/ps3_gelic_net.c @@ -556,7 +556,7 @@ static int gelic_net_stop(struct net_dev { struct gelic_net_card *card = netdev_priv(netdev); - netif_poll_disable(netdev); + napi_disable(&card->napi); netif_stop_queue(netdev); /* turn off DMA, force end */ @@ -988,32 +988,24 @@ refill: * if the quota is exceeded, but the driver has still packets. * */ -static int gelic_net_poll(struct net_device *netdev, int *budget) +static int gelic_net_poll(struct napi_struct *napi, int budget) { - struct gelic_net_card *card = netdev_priv(netdev); - int packets_to_do, packets_done = 0; - int no_more_packets = 0; - - packets_to_do = min(*budget, netdev->quota); + struct gelic_net_card *card = container_of(napi, struct gelic_net_card, napi); + struct net_device *netdev = card->netdev; + int packets_done = 0; - while (packets_to_do) { - if (gelic_net_decode_one_descr(card)) { - packets_done++; - packets_to_do--; - } else { - /* no more packets for the stack */ - no_more_packets = 1; + while (packets_done < budget) { + if (!gelic_net_decode_one_descr(card)) break; - } + + packets_done++; } - netdev->quota -= packets_done; - *budget -= packets_done; - if (no_more_packets) { - netif_rx_complete(netdev); + + if (packets_done < budget) { + netif_rx_complete(netdev, napi); gelic_net_rx_irq_on(card); - return 0; - } else - return 1; + } + return packets_done; } /** * gelic_net_change_mtu - changes the MTU of an interface @@ -1056,7 +1048,7 @@ static irqreturn_t gelic_net_interrupt(i if (status & GELIC_NET_RXINT) { gelic_net_rx_irq_off(card); - netif_rx_schedule(netdev); + netif_rx_schedule(netdev, &card->napi); } if (status & GELIC_NET_TXINT) { @@ -1160,6 +1152,8 @@ static int gelic_net_open(struct net_dev if (gelic_net_alloc_rx_skbs(card)) goto alloc_skbs_failed; + napi_enable(&card->napi); + card->tx_dma_progress = 0; card->ghiintmask = GELIC_NET_RXINT | GELIC_NET_TXINT; @@ -1361,9 +1355,6 @@ static void gelic_net_setup_netdev_ops(s /* tx watchdog */ netdev->tx_timeout = &gelic_net_tx_timeout; netdev->watchdog_timeo = GELIC_NET_WATCHDOG_TIMEOUT; - /* NAPI */ - netdev->poll = &gelic_net_poll; - netdev->weight = GELIC_NET_NAPI_WEIGHT; netdev->ethtool_ops = &gelic_net_ethtool_ops; } @@ -1391,6 +1382,9 @@ static int gelic_net_setup_netdev(struct gelic_net_setup_netdev_ops(netdev); + netif_napi_add(netdev, &card->napi, + gelic_net_poll, GELIC_NET_NAPI_WEIGHT); + netdev->features = NETIF_F_IP_CSUM; status = lv1_net_control(bus_id(card), dev_id(card), diff -puN drivers/net/ps3_gelic_net.h~git-net drivers/net/ps3_gelic_net.h --- a/drivers/net/ps3_gelic_net.h~git-net +++ a/drivers/net/ps3_gelic_net.h @@ -194,6 +194,7 @@ struct gelic_net_descr_chain { struct gelic_net_card { struct net_device *netdev; + struct napi_struct napi; /* * hypervisor requires irq_status should be * 8 bytes aligned, but u64 member is diff -puN drivers/net/qla3xxx.c~git-net drivers/net/qla3xxx.c --- a/drivers/net/qla3xxx.c~git-net +++ a/drivers/net/qla3xxx.c @@ -2302,10 +2302,10 @@ static int ql_tx_rx_clean(struct ql3_ada return work_done; } -static int ql_poll(struct net_device *ndev, int *budget) +static int ql_poll(struct napi_struct *napi, int budget) { - struct ql3_adapter *qdev = netdev_priv(ndev); - int work_to_do = min(*budget, ndev->quota); + struct ql3_adapter *qdev = container_of(napi, struct ql3_adapter, napi); + struct net_device *ndev = qdev->ndev; int rx_cleaned = 0, tx_cleaned = 0; unsigned long hw_flags; struct ql3xxx_port_registers __iomem *port_regs = qdev->mem_map_registers; @@ -2313,16 +2313,13 @@ static int ql_poll(struct net_device *nd if (!netif_carrier_ok(ndev)) goto quit_polling; - ql_tx_rx_clean(qdev, &tx_cleaned, &rx_cleaned, work_to_do); - *budget -= rx_cleaned; - ndev->quota -= rx_cleaned; + ql_tx_rx_clean(qdev, &tx_cleaned, &rx_cleaned, budget); - if( tx_cleaned + rx_cleaned != work_to_do || + if (tx_cleaned + rx_cleaned != budget || !netif_running(ndev)) { quit_polling: - netif_rx_complete(ndev); - spin_lock_irqsave(&qdev->hw_lock, hw_flags); + __netif_rx_complete(ndev, napi); ql_update_small_bufq_prod_index(qdev); ql_update_lrg_bufq_prod_index(qdev); writel(qdev->rsp_consumer_index, @@ -2330,9 +2327,8 @@ quit_polling: spin_unlock_irqrestore(&qdev->hw_lock, hw_flags); ql_enable_interrupts(qdev); - return 0; } - return 1; + return tx_cleaned + rx_cleaned; } static irqreturn_t ql3xxx_isr(int irq, void *dev_id) @@ -2382,8 +2378,8 @@ static irqreturn_t ql3xxx_isr(int irq, v spin_unlock(&qdev->adapter_lock); } else if (value & ISP_IMR_DISABLE_CMPL_INT) { ql_disable_interrupts(qdev); - if (likely(netif_rx_schedule_prep(ndev))) { - __netif_rx_schedule(ndev); + if (likely(netif_rx_schedule_prep(ndev, &qdev->napi))) { + __netif_rx_schedule(ndev, &qdev->napi); } } else { return IRQ_NONE; @@ -3609,7 +3605,7 @@ static int ql_adapter_down(struct ql3_ad del_timer_sync(&qdev->adapter_timer); - netif_poll_disable(ndev); + napi_disable(&qdev->napi); if (do_reset) { int soft_reset; @@ -3697,7 +3693,7 @@ static int ql_adapter_up(struct ql3_adap mod_timer(&qdev->adapter_timer, jiffies + HZ * 1); - netif_poll_enable(ndev); + napi_enable(&qdev->napi); ql_enable_interrupts(qdev); return 0; @@ -4053,8 +4049,7 @@ static int __devinit ql3xxx_probe(struct ndev->tx_timeout = ql3xxx_tx_timeout; ndev->watchdog_timeo = 5 * HZ; - ndev->poll = &ql_poll; - ndev->weight = 64; + netif_napi_add(ndev, &qdev->napi, ql_poll, 64); ndev->irq = pdev->irq; diff -puN drivers/net/qla3xxx.h~git-net drivers/net/qla3xxx.h --- a/drivers/net/qla3xxx.h~git-net +++ a/drivers/net/qla3xxx.h @@ -1175,6 +1175,8 @@ struct ql3_adapter { struct pci_dev *pdev; struct net_device *ndev; /* Parent NET device */ + struct napi_struct napi; + /* Hardware information */ u8 chip_rev_id; u8 pci_slot; diff -puN drivers/net/r8169.c~git-net drivers/net/r8169.c --- a/drivers/net/r8169.c~git-net +++ a/drivers/net/r8169.c @@ -384,6 +384,7 @@ struct rtl8169_private { void __iomem *mmio_addr; /* memory map physical address */ struct pci_dev *pci_dev; /* Index of PCI device */ struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; /* statistics of net device */ spinlock_t lock; /* spin lock flag */ u32 msg_enable; @@ -443,13 +444,13 @@ static void rtl_set_rx_mode(struct net_d static void rtl8169_tx_timeout(struct net_device *dev); static struct net_device_stats *rtl8169_get_stats(struct net_device *dev); static int rtl8169_rx_interrupt(struct net_device *, struct rtl8169_private *, - void __iomem *); + void __iomem *, u32 budget); static int rtl8169_change_mtu(struct net_device *dev, int new_mtu); static void rtl8169_down(struct net_device *dev); static void rtl8169_rx_clear(struct rtl8169_private *tp); #ifdef CONFIG_R8169_NAPI -static int rtl8169_poll(struct net_device *dev, int *budget); +static int rtl8169_poll(struct napi_struct *napi, int budget); #endif static const unsigned int rtl8169_rx_config = @@ -1653,8 +1654,7 @@ rtl8169_init_one(struct pci_dev *pdev, c dev->set_mac_address = rtl_set_mac_address; #ifdef CONFIG_R8169_NAPI - dev->poll = rtl8169_poll; - dev->weight = R8169_NAPI_WEIGHT; + netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT); #endif #ifdef CONFIG_R8169_VLAN @@ -1774,6 +1774,10 @@ static int rtl8169_open(struct net_devic if (retval < 0) goto err_release_ring_2; +#ifdef CONFIG_R8169_NAPI + napi_enable(&tp->napi); +#endif + rtl_hw_start(dev); rtl8169_request_timer(dev); @@ -2069,7 +2073,9 @@ static int rtl8169_change_mtu(struct net if (ret < 0) goto out; - netif_poll_enable(dev); +#ifdef CONFIG_R8169_NAPI + napi_enable(&tp->napi); +#endif rtl_hw_start(dev); @@ -2261,11 +2267,15 @@ static void rtl8169_wait_for_quiescence( synchronize_irq(dev->irq); /* Wait for any pending NAPI task to complete */ - netif_poll_disable(dev); +#ifdef CONFIG_R8169_NAPI + napi_disable(&tp->napi); +#endif rtl8169_irq_mask_and_ack(ioaddr); - netif_poll_enable(dev); +#ifdef CONFIG_R8169_NAPI + napi_enable(&tp->napi); +#endif } static void rtl8169_reinit_task(struct work_struct *work) @@ -2309,7 +2319,7 @@ static void rtl8169_reset_task(struct wo rtl8169_wait_for_quiescence(dev); - rtl8169_rx_interrupt(dev, tp, tp->mmio_addr); + rtl8169_rx_interrupt(dev, tp, tp->mmio_addr, ~(u32)0); rtl8169_tx_clear(tp); if (tp->dirty_rx == tp->cur_rx) { @@ -2614,14 +2624,14 @@ out: static int rtl8169_rx_interrupt(struct net_device *dev, struct rtl8169_private *tp, - void __iomem *ioaddr) + void __iomem *ioaddr, u32 budget) { unsigned int cur_rx, rx_left; unsigned int delta, count; cur_rx = tp->cur_rx; rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx; - rx_left = rtl8169_rx_quota(rx_left, (u32) dev->quota); + rx_left = rtl8169_rx_quota(rx_left, budget); for (; rx_left > 0; rx_left--, cur_rx++) { unsigned int entry = cur_rx % NUM_RX_DESC; @@ -2770,8 +2780,8 @@ static irqreturn_t rtl8169_interrupt(int RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); tp->intr_mask = ~tp->napi_event; - if (likely(netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); + if (likely(netif_rx_schedule_prep(dev, &tp->napi))) + __netif_rx_schedule(dev, &tp->napi); else if (netif_msg_intr(tp)) { printk(KERN_INFO "%s: interrupt %04x in poll\n", dev->name, status); @@ -2781,7 +2791,7 @@ static irqreturn_t rtl8169_interrupt(int #else /* Rx interrupt */ if (status & (RxOK | RxOverflow | RxFIFOOver)) - rtl8169_rx_interrupt(dev, tp, ioaddr); + rtl8169_rx_interrupt(dev, tp, ioaddr, ~(u32)0); /* Tx interrupt */ if (status & (TxOK | TxErr)) @@ -2804,20 +2814,18 @@ out: } #ifdef CONFIG_R8169_NAPI -static int rtl8169_poll(struct net_device *dev, int *budget) +static int rtl8169_poll(struct napi_struct *napi, int budget) { - unsigned int work_done, work_to_do = min(*budget, dev->quota); - struct rtl8169_private *tp = netdev_priv(dev); + struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); + struct net_device *dev = tp->dev; void __iomem *ioaddr = tp->mmio_addr; + int work_done; - work_done = rtl8169_rx_interrupt(dev, tp, ioaddr); + work_done = rtl8169_rx_interrupt(dev, tp, ioaddr, (u32) budget); rtl8169_tx_interrupt(dev, tp, ioaddr); - *budget -= work_done; - dev->quota -= work_done; - - if (work_done < work_to_do) { - netif_rx_complete(dev); + if (work_done < budget) { + netif_rx_complete(dev, napi); tp->intr_mask = 0xffff; /* * 20040426: the barrier is not strictly required but the @@ -2829,7 +2837,7 @@ static int rtl8169_poll(struct net_devic RTL_W16(IntrMask, tp->intr_event); } - return (work_done >= work_to_do); + return work_done; } #endif @@ -2858,7 +2866,7 @@ core_down: synchronize_irq(dev->irq); if (!poll_locked) { - netif_poll_disable(dev); + napi_disable(&tp->napi); poll_locked++; } @@ -2896,8 +2904,6 @@ static int rtl8169_close(struct net_devi free_irq(dev->irq, dev); - netif_poll_enable(dev); - pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray, diff -puN drivers/net/s2io.c~git-net drivers/net/s2io.c --- a/drivers/net/s2io.c~git-net +++ a/drivers/net/s2io.c @@ -2568,7 +2568,7 @@ static void free_rx_buffers(struct s2io_ /** * s2io_poll - Rx interrupt handler for NAPI support - * @dev : pointer to the device structure. + * @napi : pointer to the napi structure. * @budget : The number of packets that were budgeted to be processed * during one pass through the 'Poll" function. * Description: @@ -2579,9 +2579,10 @@ static void free_rx_buffers(struct s2io_ * 0 on success and 1 if there are No Rx packets to be processed. */ -static int s2io_poll(struct net_device *dev, int *budget) +static int s2io_poll(struct napi_struct *napi, int budget) { - struct s2io_nic *nic = dev->priv; + struct s2io_nic *nic = container_of(napi, struct s2io_nic, napi); + struct net_device *dev = nic->dev; int pkt_cnt = 0, org_pkts_to_process; struct mac_info *mac_control; struct config_param *config; @@ -2592,9 +2593,7 @@ static int s2io_poll(struct net_device * mac_control = &nic->mac_control; config = &nic->config; - nic->pkts_to_process = *budget; - if (nic->pkts_to_process > dev->quota) - nic->pkts_to_process = dev->quota; + nic->pkts_to_process = budget; org_pkts_to_process = nic->pkts_to_process; writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int); @@ -2608,12 +2607,8 @@ static int s2io_poll(struct net_device * goto no_rx; } } - if (!pkt_cnt) - pkt_cnt = 1; - dev->quota -= pkt_cnt; - *budget -= pkt_cnt; - netif_rx_complete(dev); + netif_rx_complete(dev, napi); for (i = 0; i < config->rx_ring_num; i++) { if (fill_rx_buffers(nic, i) == -ENOMEM) { @@ -2626,12 +2621,9 @@ static int s2io_poll(struct net_device * writeq(0x0, &bar0->rx_traffic_mask); readl(&bar0->rx_traffic_mask); atomic_dec(&nic->isr_cnt); - return 0; + return pkt_cnt; no_rx: - dev->quota -= pkt_cnt; - *budget -= pkt_cnt; - for (i = 0; i < config->rx_ring_num; i++) { if (fill_rx_buffers(nic, i) == -ENOMEM) { DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); @@ -2640,7 +2632,7 @@ no_rx: } } atomic_dec(&nic->isr_cnt); - return 1; + return pkt_cnt; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -3904,6 +3896,8 @@ static int s2io_open(struct net_device * sp->config.napi = 0; } + napi_enable(&sp->napi); + /* Initialize H/W and enable interrupts */ err = s2io_card_up(sp); if (err) { @@ -3923,6 +3917,7 @@ static int s2io_open(struct net_device * return 0; hw_init_failed: + napi_disable(&sp->napi); if (sp->intr_type == MSI_X) { if (sp->entries) { kfree(sp->entries); @@ -3956,6 +3951,7 @@ static int s2io_close(struct net_device struct s2io_nic *sp = dev->priv; netif_stop_queue(dev); + napi_disable(&sp->napi); /* Reset card, kill tasklet and free Tx and Rx buffers. */ s2io_card_down(sp); @@ -4327,8 +4323,8 @@ static irqreturn_t s2io_isr(int irq, voi if (napi) { if (reason & GEN_INTR_RXTRAFFIC) { - if ( likely ( netif_rx_schedule_prep(dev)) ) { - __netif_rx_schedule(dev); + if (likely (netif_rx_schedule_prep(dev, &sp->napi))) { + __netif_rx_schedule(dev, &sp->napi); writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_mask); } else @@ -7322,8 +7318,7 @@ s2io_init_nic(struct pci_dev *pdev, cons * will use eth_mac_addr() for dev->set_mac_address * mac address will be set every time dev->open() is called */ - dev->poll = s2io_poll; - dev->weight = 32; + netif_napi_add(dev, &sp->napi, s2io_poll, 32); #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = s2io_netpoll; diff -puN drivers/net/s2io.h~git-net drivers/net/s2io.h --- a/drivers/net/s2io.h~git-net +++ a/drivers/net/s2io.h @@ -791,6 +791,7 @@ struct s2io_nic { */ int pkts_to_process; struct net_device *dev; + struct napi_struct napi; struct mac_info mac_control; struct config_param config; struct pci_dev *pdev; @@ -1026,7 +1027,7 @@ static void s2io_set_multicast(struct ne static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); static void s2io_link(struct s2io_nic * sp, int link); static void s2io_reset(struct s2io_nic * sp); -static int s2io_poll(struct net_device *dev, int *budget); +static int s2io_poll(struct napi_struct *napi, int budget); static void s2io_init_pci(struct s2io_nic * sp); static int s2io_set_mac_addr(struct net_device *dev, u8 * addr); static void s2io_alarm_handle(unsigned long data); diff -puN drivers/net/sb1250-mac.c~git-net drivers/net/sb1250-mac.c --- a/drivers/net/sb1250-mac.c~git-net +++ a/drivers/net/sb1250-mac.c @@ -238,6 +238,7 @@ struct sbmac_softc { */ struct net_device *sbm_dev; /* pointer to linux device */ + struct napi_struct napi; spinlock_t sbm_lock; /* spin lock */ struct timer_list sbm_timer; /* for monitoring MII */ struct net_device_stats sbm_stats; @@ -320,7 +321,7 @@ static struct net_device_stats *sbmac_ge static void sbmac_set_rx_mode(struct net_device *dev); static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static int sbmac_close(struct net_device *dev); -static int sbmac_poll(struct net_device *poll_dev, int *budget); +static int sbmac_poll(struct napi_struct *napi, int budget); static int sbmac_mii_poll(struct sbmac_softc *s,int noisy); static int sbmac_mii_probe(struct net_device *dev); @@ -2152,20 +2153,13 @@ static irqreturn_t sbmac_intr(int irq,vo * Transmits on channel 0 */ - if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) { + if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) sbdma_tx_process(sc,&(sc->sbm_txdma), 0); -#ifdef CONFIG_NETPOLL_TRAP - if (netpoll_trap()) { - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) - __netif_schedule(dev); - } -#endif - } if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) { - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &sc->napi)) { __raw_writeq(0, sc->sbm_imr); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &sc->napi); /* Depend on the exit from poll to reenable intr */ } else { @@ -2468,8 +2462,8 @@ static int sbmac_init(struct net_device dev->do_ioctl = sbmac_mii_ioctl; dev->tx_timeout = sbmac_tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; - dev->poll = sbmac_poll; - dev->weight = 16; + + netif_napi_add(dev, &sc->napi, sbmac_poll, 16); dev->change_mtu = sb1250_change_mtu; #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2535,6 +2529,8 @@ static int sbmac_open(struct net_device return -EINVAL; } + napi_enable(&sc->napi); + /* * Configure default speed */ @@ -2848,6 +2844,8 @@ static int sbmac_close(struct net_device unsigned long flags; int irq; + napi_disable(&sc->napi); + sbmac_set_channel_state(sc,sbmac_state_off); del_timer_sync(&sc->sbm_timer); @@ -2872,26 +2870,17 @@ static int sbmac_close(struct net_device return 0; } -static int sbmac_poll(struct net_device *dev, int *budget) +static int sbmac_poll(struct napi_struct *napi, int budget) { - int work_to_do; + struct sbmac_softc *sc = container_of(napi, struct sbmac_softc, napi); + struct net_device *dev = sc->sbm_dev; int work_done; - struct sbmac_softc *sc = netdev_priv(dev); - - work_to_do = min(*budget, dev->quota); - work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), work_to_do, 1); - - if (work_done > work_to_do) - printk(KERN_ERR "%s exceeded work_to_do budget=%d quota=%d work-done=%d\n", - sc->sbm_dev->name, *budget, dev->quota, work_done); + work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), budget, 1); sbdma_tx_process(sc, &(sc->sbm_txdma), 1); - *budget -= work_done; - dev->quota -= work_done; - - if (work_done < work_to_do) { - netif_rx_complete(dev); + if (work_done < budget) { + netif_rx_complete(dev, napi); #ifdef CONFIG_SBMAC_COALESCE __raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) | @@ -2903,7 +2892,7 @@ static int sbmac_poll(struct net_device #endif } - return (work_done >= work_to_do); + return work_done; } #if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR) diff -puN drivers/net/sis190.c~git-net drivers/net/sis190.c --- a/drivers/net/sis190.c~git-net +++ a/drivers/net/sis190.c @@ -47,24 +47,13 @@ #define PHY_ID_ANY 0x1f #define MII_REG_ANY 0x1f -#ifdef CONFIG_SIS190_NAPI -#define NAPI_SUFFIX "-NAPI" -#else -#define NAPI_SUFFIX "" -#endif - -#define DRV_VERSION "1.2" NAPI_SUFFIX +#define DRV_VERSION "1.2" #define DRV_NAME "sis190" #define SIS190_DRIVER_NAME DRV_NAME " Gigabit Ethernet driver " DRV_VERSION #define PFX DRV_NAME ": " -#ifdef CONFIG_SIS190_NAPI -#define sis190_rx_skb netif_receive_skb -#define sis190_rx_quota(count, quota) min(count, quota) -#else #define sis190_rx_skb netif_rx #define sis190_rx_quota(count, quota) count -#endif #define MAC_ADDR_LEN 6 @@ -1115,10 +1104,8 @@ static void sis190_down(struct net_devic synchronize_irq(dev->irq); - if (!poll_locked) { - netif_poll_disable(dev); + if (!poll_locked) poll_locked++; - } synchronize_sched(); @@ -1137,8 +1124,6 @@ static int sis190_close(struct net_devic free_irq(dev->irq, dev); - netif_poll_enable(dev); - pci_free_consistent(pdev, TX_RING_BYTES, tp->TxDescRing, tp->tx_dma); pci_free_consistent(pdev, RX_RING_BYTES, tp->RxDescRing, tp->rx_dma); diff -puN drivers/net/skge.c~git-net drivers/net/skge.c --- a/drivers/net/skge.c~git-net +++ a/drivers/net/skge.c @@ -2528,7 +2528,7 @@ static int skge_up(struct net_device *de skge_write32(hw, B0_IMSK, hw->intr_mask); spin_unlock_irq(&hw->hw_lock); - netif_poll_enable(dev); + napi_enable(&skge->napi); return 0; free_rx_ring: @@ -2558,7 +2558,7 @@ static int skge_down(struct net_device * if (hw->chip_id == CHIP_ID_GENESIS && hw->phy_type == SK_PHY_XMAC) del_timer_sync(&skge->link_timer); - netif_poll_disable(dev); + napi_disable(&skge->napi); netif_carrier_off(dev); spin_lock_irq(&hw->hw_lock); @@ -3044,14 +3044,13 @@ static void skge_tx_done(struct net_devi } } -static int skge_poll(struct net_device *dev, int *budget) +static int skge_poll(struct napi_struct *napi, int to_do) { - struct skge_port *skge = netdev_priv(dev); + struct skge_port *skge = container_of(napi, struct skge_port, napi); + struct net_device *dev = skge->netdev; struct skge_hw *hw = skge->hw; struct skge_ring *ring = &skge->rx_ring; struct skge_element *e; - unsigned long flags; - int to_do = min(dev->quota, *budget); int work_done = 0; skge_tx_done(dev); @@ -3082,20 +3081,16 @@ static int skge_poll(struct net_device * wmb(); skge_write8(hw, Q_ADDR(rxqaddr[skge->port], Q_CSR), CSR_START); - *budget -= work_done; - dev->quota -= work_done; - - if (work_done >= to_do) - return 1; /* not done */ - - spin_lock_irqsave(&hw->hw_lock, flags); - __netif_rx_complete(dev); - hw->intr_mask |= napimask[skge->port]; - skge_write32(hw, B0_IMSK, hw->intr_mask); - skge_read32(hw, B0_IMSK); - spin_unlock_irqrestore(&hw->hw_lock, flags); + if (work_done < to_do) { + spin_lock_irq(&hw->hw_lock); + __netif_rx_complete(dev, napi); + hw->intr_mask |= napimask[skge->port]; + skge_write32(hw, B0_IMSK, hw->intr_mask); + skge_read32(hw, B0_IMSK); + spin_unlock_irq(&hw->hw_lock); + } - return 0; + return work_done; } /* Parity errors seem to happen when Genesis is connected to a switch @@ -3252,8 +3247,9 @@ static irqreturn_t skge_intr(int irq, vo } if (status & (IS_XA1_F|IS_R1_F)) { + struct skge_port *skge = netdev_priv(hw->dev[0]); hw->intr_mask &= ~(IS_XA1_F|IS_R1_F); - netif_rx_schedule(hw->dev[0]); + netif_rx_schedule(hw->dev[0], &skge->napi); } if (status & IS_PA_TO_TX1) @@ -3271,13 +3267,14 @@ static irqreturn_t skge_intr(int irq, vo skge_mac_intr(hw, 0); if (hw->dev[1]) { + struct skge_port *skge = netdev_priv(hw->dev[1]); + if (status & (IS_XA2_F|IS_R2_F)) { hw->intr_mask &= ~(IS_XA2_F|IS_R2_F); - netif_rx_schedule(hw->dev[1]); + netif_rx_schedule(hw->dev[1], &skge->napi); } if (status & IS_PA_TO_RX2) { - struct skge_port *skge = netdev_priv(hw->dev[1]); ++skge->net_stats.rx_over_errors; skge_write16(hw, B3_PA_CTRL, PA_CLR_TO_RX2); } @@ -3569,8 +3566,6 @@ static struct net_device *skge_devinit(s SET_ETHTOOL_OPS(dev, &skge_ethtool_ops); dev->tx_timeout = skge_tx_timeout; dev->watchdog_timeo = TX_WATCHDOG; - dev->poll = skge_poll; - dev->weight = NAPI_WEIGHT; #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = skge_netpoll; #endif @@ -3580,6 +3575,7 @@ static struct net_device *skge_devinit(s dev->features |= NETIF_F_HIGHDMA; skge = netdev_priv(dev); + netif_napi_add(dev, &skge->napi, skge_poll, NAPI_WEIGHT); skge->netdev = dev; skge->hw = hw; skge->msg_enable = netif_msg_init(debug, default_msg); diff -puN drivers/net/skge.h~git-net drivers/net/skge.h --- a/drivers/net/skge.h~git-net +++ a/drivers/net/skge.h @@ -2448,6 +2448,7 @@ enum pause_status { struct skge_port { struct skge_hw *hw; struct net_device *netdev; + struct napi_struct napi; int port; u32 msg_enable; diff -puN drivers/net/sky2.c~git-net drivers/net/sky2.c --- a/drivers/net/sky2.c~git-net +++ a/drivers/net/sky2.c @@ -1107,7 +1107,7 @@ static void sky2_vlan_rx_register(struct u16 port = sky2->port; netif_tx_lock_bh(dev); - netif_poll_disable(sky2->hw->dev[0]); + napi_disable(&hw->napi); sky2->vlgrp = grp; if (grp) { @@ -1122,7 +1122,7 @@ static void sky2_vlan_rx_register(struct TX_VLAN_TAG_OFF); } - netif_poll_enable(sky2->hw->dev[0]); + napi_enable(&hw->napi); netif_tx_unlock_bh(dev); } #endif @@ -1361,9 +1361,13 @@ static int sky2_up(struct net_device *de sky2_prefetch_init(hw, txqaddr[port], sky2->tx_le_map, TX_RING_SIZE - 1); + napi_enable(&hw->napi); + err = sky2_rx_start(sky2); - if (err) + if (err) { + napi_disable(&hw->napi); goto err_out; + } /* Enable interrupts from phy/mac for port */ imask = sky2_read32(hw, B0_IMSK); @@ -1654,6 +1658,8 @@ static int sky2_down(struct net_device * /* Stop more packets from being queued */ netif_stop_queue(dev); + napi_disable(&hw->napi); + /* Disable port IRQ */ imask = sky2_read32(hw, B0_IMSK); imask &= ~portirq_msk[port]; @@ -2003,7 +2009,7 @@ static int sky2_change_mtu(struct net_de dev->trans_start = jiffies; /* prevent tx timeout */ netif_stop_queue(dev); - netif_poll_disable(hw->dev[0]); + napi_disable(&hw->napi); synchronize_irq(hw->pdev->irq); @@ -2030,12 +2036,16 @@ static int sky2_change_mtu(struct net_de err = sky2_rx_start(sky2); sky2_write32(hw, B0_IMSK, imask); + /* Unconditionally re-enable NAPI because even if we + * call dev_close() that will do a napi_disable(). + */ + napi_enable(&hw->napi); + if (err) dev_close(dev); else { gma_write16(hw, port, GM_GP_CTRL, ctl); - netif_poll_enable(hw->dev[0]); netif_wake_queue(dev); } @@ -2555,11 +2565,11 @@ static void sky2_err_intr(struct sky2_hw sky2_le_error(hw, 1, Q_XA2, TX_RING_SIZE); } -static int sky2_poll(struct net_device *dev0, int *budget) +static int sky2_poll(struct napi_struct *napi, int work_limit) { - struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw; - int work_done; + struct sky2_hw *hw = container_of(napi, struct sky2_hw, napi); u32 status = sky2_read32(hw, B0_Y2_SP_EISR); + int work_done; if (unlikely(status & Y2_IS_ERROR)) sky2_err_intr(hw, status); @@ -2570,31 +2580,27 @@ static int sky2_poll(struct net_device * if (status & Y2_IS_IRQ_PHY2) sky2_phy_intr(hw, 1); - work_done = sky2_status_intr(hw, min(dev0->quota, *budget)); - *budget -= work_done; - dev0->quota -= work_done; + work_done = sky2_status_intr(hw, work_limit); /* More work? */ - if (hw->st_idx != sky2_read16(hw, STAT_PUT_IDX)) - return 1; + if (hw->st_idx == sky2_read16(hw, STAT_PUT_IDX)) { + /* Bug/Errata workaround? + * Need to kick the TX irq moderation timer. + */ + if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) { + sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP); + sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START); + } - /* Bug/Errata workaround? - * Need to kick the TX irq moderation timer. - */ - if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) { - sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP); - sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START); + napi_complete(napi); + sky2_read32(hw, B0_Y2_SP_LISR); } - netif_rx_complete(dev0); - - sky2_read32(hw, B0_Y2_SP_LISR); - return 0; + return work_done; } static irqreturn_t sky2_intr(int irq, void *dev_id) { struct sky2_hw *hw = dev_id; - struct net_device *dev0 = hw->dev[0]; u32 status; /* Reading this mask interrupts as side effect */ @@ -2603,8 +2609,8 @@ static irqreturn_t sky2_intr(int irq, vo return IRQ_NONE; prefetch(&hw->st_le[hw->st_idx]); - if (likely(__netif_rx_schedule_prep(dev0))) - __netif_rx_schedule(dev0); + + napi_schedule(&hw->napi); return IRQ_HANDLED; } @@ -2613,10 +2619,8 @@ static irqreturn_t sky2_intr(int irq, vo static void sky2_netpoll(struct net_device *dev) { struct sky2_port *sky2 = netdev_priv(dev); - struct net_device *dev0 = sky2->hw->dev[0]; - if (netif_running(dev) && __netif_rx_schedule_prep(dev0)) - __netif_rx_schedule(dev0); + napi_schedule(&sky2->hw->napi); } #endif @@ -2834,8 +2838,6 @@ static void sky2_restart(struct work_str sky2_write32(hw, B0_IMSK, 0); sky2_read32(hw, B0_IMSK); - netif_poll_disable(hw->dev[0]); - for (i = 0; i < hw->ports; i++) { dev = hw->dev[i]; if (netif_running(dev)) @@ -2844,7 +2846,6 @@ static void sky2_restart(struct work_str sky2_reset(hw); sky2_write32(hw, B0_IMSK, Y2_IS_BASE); - netif_poll_enable(hw->dev[0]); for (i = 0; i < hw->ports; i++) { dev = hw->dev[i]; @@ -3655,8 +3656,8 @@ static struct dentry *sky2_debug; static int sky2_debug_show(struct seq_file *seq, void *v) { struct net_device *dev = seq->private; - const struct sky2_port *sky2 = netdev_priv(dev); - const struct sky2_hw *hw = sky2->hw; + struct sky2_port *sky2 = netdev_priv(dev); + struct sky2_hw *hw = sky2->hw; unsigned port = sky2->port; unsigned idx, last; int sop; @@ -3669,7 +3670,7 @@ static int sky2_debug_show(struct seq_fi sky2_read32(hw, B0_IMSK), sky2_read32(hw, B0_Y2_SP_ICR)); - netif_poll_disable(hw->dev[0]); + napi_disable(&hw->napi); last = sky2_read16(hw, STAT_PUT_IDX); if (hw->st_idx == last) @@ -3739,7 +3740,7 @@ static int sky2_debug_show(struct seq_fi last = sky2_read16(hw, Y2_QADDR(rxqaddr[port], PREF_UNIT_PUT_IDX)), sky2_read16(hw, Y2_QADDR(rxqaddr[port], PREF_UNIT_LAST_IDX))); - netif_poll_enable(hw->dev[0]); + napi_enable(&hw->napi); return 0; } @@ -3856,15 +3857,8 @@ static __devinit struct net_device *sky2 SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops); dev->tx_timeout = sky2_tx_timeout; dev->watchdog_timeo = TX_WATCHDOG; - if (port == 0) - dev->poll = sky2_poll; - dev->weight = NAPI_WEIGHT; #ifdef CONFIG_NET_POLL_CONTROLLER - /* Network console (only works on port 0) - * because netpoll makes assumptions about NAPI - */ - if (port == 0) - dev->poll_controller = sky2_netpoll; + dev->poll_controller = sky2_netpoll; #endif sky2 = netdev_priv(dev); @@ -4074,6 +4068,7 @@ static int __devinit sky2_probe(struct p err = -ENOMEM; goto err_out_free_pci; } + netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT); if (!disable_msi && pci_enable_msi(pdev) == 0) { err = sky2_test_msi(hw); @@ -4198,8 +4193,6 @@ static int sky2_suspend(struct pci_dev * if (!hw) return 0; - netif_poll_disable(hw->dev[0]); - for (i = 0; i < hw->ports; i++) { struct net_device *dev = hw->dev[i]; struct sky2_port *sky2 = netdev_priv(dev); @@ -4262,8 +4255,6 @@ static int sky2_resume(struct pci_dev *p } } - netif_poll_enable(hw->dev[0]); - return 0; out: dev_err(&pdev->dev, "resume failed (%d)\n", err); @@ -4280,7 +4271,7 @@ static void sky2_shutdown(struct pci_dev if (!hw) return; - netif_poll_disable(hw->dev[0]); + napi_disable(&hw->napi); for (i = 0; i < hw->ports; i++) { struct net_device *dev = hw->dev[i]; diff -puN drivers/net/sky2.h~git-net drivers/net/sky2.h --- a/drivers/net/sky2.h~git-net +++ a/drivers/net/sky2.h @@ -2024,6 +2024,7 @@ struct sky2_port { struct sky2_hw { void __iomem *regs; struct pci_dev *pdev; + struct napi_struct napi; struct net_device *dev[2]; u8 chip_id; diff -puN drivers/net/spider_net.c~git-net drivers/net/spider_net.c --- a/drivers/net/spider_net.c~git-net +++ a/drivers/net/spider_net.c @@ -1278,34 +1278,26 @@ bad_desc: * (using netif_receive_skb). If all/enough packets are up, the driver * reenables interrupts and returns 0. If not, 1 is returned. */ -static int -spider_net_poll(struct net_device *netdev, int *budget) +static int spider_net_poll(struct napi_struct *napi, int budget) { - struct spider_net_card *card = netdev_priv(netdev); - int packets_to_do, packets_done = 0; - int no_more_packets = 0; - - packets_to_do = min(*budget, netdev->quota); + struct spider_net_card *card = container_of(napi, struct spider_net_card, napi); + struct net_device *netdev = card->netdev; + int packets_done = 0; - while (packets_to_do) { - if (spider_net_decode_one_descr(card)) { - packets_done++; - packets_to_do--; - } else { - /* no more packets for the stack */ - no_more_packets = 1; + while (packets_done < budget) { + if (!spider_net_decode_one_descr(card)) break; - } + + packets_done++; } if ((packets_done == 0) && (card->num_rx_ints != 0)) { - no_more_packets = spider_net_resync_tail_ptr(card); + if (!spider_net_resync_tail_ptr(card)) + packets_done = budget; spider_net_resync_head_ptr(card); } card->num_rx_ints = 0; - netdev->quota -= packets_done; - *budget -= packets_done; spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); @@ -1313,14 +1305,13 @@ spider_net_poll(struct net_device *netde /* if all packets are in the stack, enable interrupts and return 0 */ /* if not, return 1 */ - if (no_more_packets) { - netif_rx_complete(netdev); + if (packets_done < budget) { + netif_rx_complete(netdev, napi); spider_net_rx_irq_on(card); card->ignore_rx_ramfull = 0; - return 0; } - return 1; + return packets_done; } /** @@ -1563,7 +1554,8 @@ spider_net_handle_error_irq(struct spide spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); card->num_rx_ints ++; - netif_rx_schedule(card->netdev); + netif_rx_schedule(card->netdev, + &card->napi); } show_error = 0; break; @@ -1583,7 +1575,8 @@ spider_net_handle_error_irq(struct spide spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); card->num_rx_ints ++; - netif_rx_schedule(card->netdev); + netif_rx_schedule(card->netdev, + &card->napi); show_error = 0; break; @@ -1597,7 +1590,8 @@ spider_net_handle_error_irq(struct spide spider_net_refill_rx_chain(card); spider_net_enable_rxdmac(card); card->num_rx_ints ++; - netif_rx_schedule(card->netdev); + netif_rx_schedule(card->netdev, + &card->napi); show_error = 0; break; @@ -1690,11 +1684,11 @@ spider_net_interrupt(int irq, void *ptr) if (status_reg & SPIDER_NET_RXINT ) { spider_net_rx_irq_off(card); - netif_rx_schedule(netdev); + netif_rx_schedule(netdev, &card->napi); card->num_rx_ints ++; } if (status_reg & SPIDER_NET_TXINT) - netif_rx_schedule(netdev); + netif_rx_schedule(netdev, &card->napi); if (status_reg & SPIDER_NET_LINKINT) spider_net_link_reset(netdev); @@ -2038,7 +2032,7 @@ spider_net_open(struct net_device *netde netif_start_queue(netdev); netif_carrier_on(netdev); - netif_poll_enable(netdev); + napi_enable(&card->napi); spider_net_enable_interrupts(card); @@ -2208,7 +2202,7 @@ spider_net_stop(struct net_device *netde { struct spider_net_card *card = netdev_priv(netdev); - netif_poll_disable(netdev); + napi_disable(&card->napi); netif_carrier_off(netdev); netif_stop_queue(netdev); del_timer_sync(&card->tx_timer); @@ -2308,9 +2302,6 @@ spider_net_setup_netdev_ops(struct net_d /* tx watchdog */ netdev->tx_timeout = &spider_net_tx_timeout; netdev->watchdog_timeo = SPIDER_NET_WATCHDOG_TIMEOUT; - /* NAPI */ - netdev->poll = &spider_net_poll; - netdev->weight = SPIDER_NET_NAPI_WEIGHT; /* HW VLAN */ #ifdef CONFIG_NET_POLL_CONTROLLER /* poll controller */ @@ -2355,6 +2346,9 @@ spider_net_setup_netdev(struct spider_ne card->options.rx_csum = SPIDER_NET_RX_CSUM_DEFAULT; + netif_napi_add(netdev, &card->napi, + spider_net_poll, SPIDER_NET_NAPI_WEIGHT); + spider_net_setup_netdev_ops(netdev); netdev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX; diff -puN drivers/net/spider_net.h~git-net drivers/net/spider_net.h --- a/drivers/net/spider_net.h~git-net +++ a/drivers/net/spider_net.h @@ -466,6 +466,8 @@ struct spider_net_card { struct pci_dev *pdev; struct mii_phy phy; + struct napi_struct napi; + int medium; void __iomem *regs; diff -puN drivers/net/starfire.c~git-net drivers/net/starfire.c --- a/drivers/net/starfire.c~git-net +++ a/drivers/net/starfire.c @@ -178,16 +178,13 @@ static int full_duplex[MAX_UNITS] = {0, #define skb_num_frags(skb) (skb_shinfo(skb)->nr_frags + 1) #ifdef HAVE_NETDEV_POLL -#define init_poll(dev) \ -do { \ - dev->poll = &netdev_poll; \ - dev->weight = max_interrupt_work; \ -} while (0) -#define netdev_rx(dev, ioaddr) \ +#define init_poll(dev, np) \ + netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work) +#define netdev_rx(dev, np, ioaddr) \ do { \ u32 intr_enable; \ - if (netif_rx_schedule_prep(dev)) { \ - __netif_rx_schedule(dev); \ + if (netif_rx_schedule_prep(dev, &np->napi)) { \ + __netif_rx_schedule(dev, &np->napi); \ intr_enable = readl(ioaddr + IntrEnable); \ intr_enable &= ~(IntrRxDone | IntrRxEmpty); \ writel(intr_enable, ioaddr + IntrEnable); \ @@ -204,12 +201,12 @@ do { \ } while (0) #define netdev_receive_skb(skb) netif_receive_skb(skb) #define vlan_netdev_receive_skb(skb, vlgrp, vlid) vlan_hwaccel_receive_skb(skb, vlgrp, vlid) -static int netdev_poll(struct net_device *dev, int *budget); +static int netdev_poll(struct napi_struct *napi, int budget); #else /* not HAVE_NETDEV_POLL */ -#define init_poll(dev) +#define init_poll(dev, np) #define netdev_receive_skb(skb) netif_rx(skb) #define vlan_netdev_receive_skb(skb, vlgrp, vlid) vlan_hwaccel_rx(skb, vlgrp, vlid) -#define netdev_rx(dev, ioaddr) \ +#define netdev_rx(dev, np, ioaddr) \ do { \ int quota = np->dirty_rx + RX_RING_SIZE - np->cur_rx; \ __netdev_rx(dev, "a);\ @@ -599,6 +596,8 @@ struct netdev_private { struct tx_done_desc *tx_done_q; dma_addr_t tx_done_q_dma; unsigned int tx_done; + struct napi_struct napi; + struct net_device *dev; struct net_device_stats stats; struct pci_dev *pci_dev; #ifdef VLAN_SUPPORT @@ -791,6 +790,7 @@ static int __devinit starfire_init_one(s dev->irq = irq; np = netdev_priv(dev); + np->dev = dev; np->base = base; spin_lock_init(&np->lock); pci_set_drvdata(pdev, dev); @@ -851,7 +851,7 @@ static int __devinit starfire_init_one(s dev->hard_start_xmit = &start_tx; dev->tx_timeout = tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; - init_poll(dev); + init_poll(dev, np); dev->stop = &netdev_close; dev->get_stats = &get_stats; dev->set_multicast_list = &set_rx_mode; @@ -1056,6 +1056,9 @@ static int netdev_open(struct net_device writel(np->intr_timer_ctrl, ioaddr + IntrTimerCtrl); +#ifdef HAVE_NETDEV_POLL + napi_enable(&np->napi); +#endif netif_start_queue(dev); if (debug > 1) @@ -1330,7 +1333,7 @@ static irqreturn_t intr_handler(int irq, handled = 1; if (intr_status & (IntrRxDone | IntrRxEmpty)) - netdev_rx(dev, ioaddr); + netdev_rx(dev, np, ioaddr); /* Scavenge the skbuff list based on the Tx-done queue. There are redundant checks here that may be cleaned up @@ -1531,36 +1534,35 @@ static int __netdev_rx(struct net_device #ifdef HAVE_NETDEV_POLL -static int netdev_poll(struct net_device *dev, int *budget) +static int netdev_poll(struct napi_struct *napi, int budget) { + struct netdev_private *np = container_of(napi, struct netdev_private, napi); + struct net_device *dev = np->dev; u32 intr_status; - struct netdev_private *np = netdev_priv(dev); void __iomem *ioaddr = np->base; - int retcode = 0, quota = dev->quota; + int quota = budget; do { writel(IntrRxDone | IntrRxEmpty, ioaddr + IntrClear); - retcode = __netdev_rx(dev, "a); - *budget -= (dev->quota - quota); - dev->quota = quota; - if (retcode) + if (__netdev_rx(dev, "a)) goto out; intr_status = readl(ioaddr + IntrStatus); } while (intr_status & (IntrRxDone | IntrRxEmpty)); - netif_rx_complete(dev); + netif_rx_complete(dev, napi); intr_status = readl(ioaddr + IntrEnable); intr_status |= IntrRxDone | IntrRxEmpty; writel(intr_status, ioaddr + IntrEnable); out: if (debug > 5) - printk(KERN_DEBUG " exiting netdev_poll(): %d.\n", retcode); + printk(KERN_DEBUG " exiting netdev_poll(): %d.\n", + budget - quota); /* Restart Rx engine if stopped. */ - return retcode; + return budget - quota; } #endif /* HAVE_NETDEV_POLL */ @@ -1904,6 +1906,9 @@ static int netdev_close(struct net_devic int i; netif_stop_queue(dev); +#ifdef HAVE_NETDEV_POLL + napi_disable(&np->napi); +#endif if (debug > 1) { printk(KERN_DEBUG "%s: Shutting down ethercard, Intr status %#8.8x.\n", diff -puN drivers/net/sungem.c~git-net drivers/net/sungem.c --- a/drivers/net/sungem.c~git-net +++ a/drivers/net/sungem.c @@ -19,7 +19,7 @@ * * gem_change_mtu() and gem_set_multicast() are called with a read_lock() * help by net/core/dev.c, thus they can't schedule. That means they can't - * call netif_poll_disable() neither, thus force gem_poll() to keep a spinlock + * call napi_disable() neither, thus force gem_poll() to keep a spinlock * where it could have been dropped. change_mtu especially would love also to * be able to msleep instead of horrid locked delays when resetting the HW, * but that read_lock() makes it impossible, unless I defer it's action to @@ -878,19 +878,20 @@ static int gem_rx(struct gem *gp, int wo return work_done; } -static int gem_poll(struct net_device *dev, int *budget) +static int gem_poll(struct napi_struct *napi, int budget) { - struct gem *gp = dev->priv; + struct gem *gp = container_of(napi, struct gem, napi); + struct net_device *dev = gp->dev; unsigned long flags; + int work_done; /* * NAPI locking nightmare: See comment at head of driver */ spin_lock_irqsave(&gp->lock, flags); + work_done = 0; do { - int work_to_do, work_done; - /* Handle anomalies */ if (gp->status & GREG_STAT_ABNORMAL) { if (gem_abnormal_irq(dev, gp, gp->status)) @@ -906,29 +907,25 @@ static int gem_poll(struct net_device *d /* Run RX thread. We don't use any locking here, * code willing to do bad things - like cleaning the - * rx ring - must call netif_poll_disable(), which + * rx ring - must call napi_disable(), which * schedule_timeout()'s if polling is already disabled. */ - work_to_do = min(*budget, dev->quota); - - work_done = gem_rx(gp, work_to_do); - - *budget -= work_done; - dev->quota -= work_done; + work_done += gem_rx(gp, budget); - if (work_done >= work_to_do) - return 1; + if (work_done >= budget) + return work_done; spin_lock_irqsave(&gp->lock, flags); gp->status = readl(gp->regs + GREG_STAT); } while (gp->status & GREG_STAT_NAPI); - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); gem_enable_ints(gp); spin_unlock_irqrestore(&gp->lock, flags); - return 0; + + return work_done; } static irqreturn_t gem_interrupt(int irq, void *dev_id) @@ -946,17 +943,17 @@ static irqreturn_t gem_interrupt(int irq spin_lock_irqsave(&gp->lock, flags); - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &gp->napi)) { u32 gem_status = readl(gp->regs + GREG_STAT); if (gem_status == 0) { - netif_poll_enable(dev); + napi_enable(&gp->napi); spin_unlock_irqrestore(&gp->lock, flags); return IRQ_NONE; } gp->status = gem_status; gem_disable_ints(gp); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &gp->napi); } spin_unlock_irqrestore(&gp->lock, flags); @@ -2284,7 +2281,7 @@ static void gem_reset_task(struct work_s mutex_lock(&gp->pm_mutex); - netif_poll_disable(gp->dev); + napi_disable(&gp->napi); spin_lock_irq(&gp->lock); spin_lock(&gp->tx_lock); @@ -2307,7 +2304,7 @@ static void gem_reset_task(struct work_s spin_unlock(&gp->tx_lock); spin_unlock_irq(&gp->lock); - netif_poll_enable(gp->dev); + napi_enable(&gp->napi); mutex_unlock(&gp->pm_mutex); } @@ -2324,6 +2321,8 @@ static int gem_open(struct net_device *d if (!gp->asleep) rc = gem_do_start(dev); gp->opened = (rc == 0); + if (gp->opened) + napi_enable(&gp->napi); mutex_unlock(&gp->pm_mutex); @@ -2334,9 +2333,7 @@ static int gem_close(struct net_device * { struct gem *gp = dev->priv; - /* Note: we don't need to call netif_poll_disable() here because - * our caller (dev_close) already did it for us - */ + napi_disable(&gp->napi); mutex_lock(&gp->pm_mutex); @@ -2358,7 +2355,7 @@ static int gem_suspend(struct pci_dev *p mutex_lock(&gp->pm_mutex); - netif_poll_disable(dev); + napi_disable(&gp->napi); printk(KERN_INFO "%s: suspending, WakeOnLan %s\n", dev->name, @@ -2482,7 +2479,7 @@ static int gem_resume(struct pci_dev *pd spin_unlock(&gp->tx_lock); spin_unlock_irqrestore(&gp->lock, flags); - netif_poll_enable(dev); + napi_enable(&gp->napi); mutex_unlock(&gp->pm_mutex); @@ -3121,8 +3118,7 @@ static int __devinit gem_init_one(struct dev->get_stats = gem_get_stats; dev->set_multicast_list = gem_set_multicast; dev->do_ioctl = gem_ioctl; - dev->poll = gem_poll; - dev->weight = 64; + netif_napi_add(dev, &gp->napi, gem_poll, 64); dev->ethtool_ops = &gem_ethtool_ops; dev->tx_timeout = gem_tx_timeout; dev->watchdog_timeo = 5 * HZ; diff -puN drivers/net/sungem.h~git-net drivers/net/sungem.h --- a/drivers/net/sungem.h~git-net +++ a/drivers/net/sungem.h @@ -993,6 +993,7 @@ struct gem { u32 msg_enable; u32 status; + struct napi_struct napi; struct net_device_stats net_stats; int tx_fifo_sz; diff -puN drivers/net/tc35815.c~git-net drivers/net/tc35815.c --- a/drivers/net/tc35815.c~git-net +++ a/drivers/net/tc35815.c @@ -414,6 +414,9 @@ enum tc35815_timer_state { struct tc35815_local { struct pci_dev *pci_dev; + struct net_device *dev; + struct napi_struct napi; + /* statistics */ struct net_device_stats stats; struct { @@ -566,7 +569,7 @@ static int tc35815_send_packet(struct sk static irqreturn_t tc35815_interrupt(int irq, void *dev_id); #ifdef TC35815_NAPI static int tc35815_rx(struct net_device *dev, int limit); -static int tc35815_poll(struct net_device *dev, int *budget); +static int tc35815_poll(struct napi_struct *napi, int budget); #else static void tc35815_rx(struct net_device *dev); #endif @@ -685,6 +688,7 @@ static int __devinit tc35815_init_one (s SET_MODULE_OWNER(dev); SET_NETDEV_DEV(dev, &pdev->dev); lp = dev->priv; + lp->dev = dev; /* enable device (incl. PCI PM wakeup), and bus-mastering */ rc = pci_enable_device (pdev); @@ -738,8 +742,7 @@ static int __devinit tc35815_init_one (s dev->tx_timeout = tc35815_tx_timeout; dev->watchdog_timeo = TC35815_TX_TIMEOUT; #ifdef TC35815_NAPI - dev->poll = tc35815_poll; - dev->weight = NAPI_WEIGHT; + netif_napi_add(dev, &lp->napi, tc35815_poll, NAPI_WEIGHT); #endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = tc35815_poll_controller; @@ -748,8 +751,6 @@ static int __devinit tc35815_init_one (s dev->irq = pdev->irq; dev->base_addr = (unsigned long) ioaddr; - /* dev->priv/lp zeroed and aligned in alloc_etherdev */ - lp = dev->priv; spin_lock_init(&lp->lock); lp->pci_dev = pdev; lp->boardtype = ent->driver_data; @@ -1237,6 +1238,10 @@ tc35815_open(struct net_device *dev) return -EAGAIN; } +#ifdef TC35815_NAPI + napi_enable(&lp->napi); +#endif + /* Reset the hardware here. Don't forget to set the station address. */ spin_lock_irq(&lp->lock); tc35815_chip_init(dev); @@ -1436,6 +1441,7 @@ static int tc35815_do_interrupt(struct n static irqreturn_t tc35815_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; + struct tc35815_local *lp = netdev_priv(dev); struct tc35815_regs __iomem *tr = (struct tc35815_regs __iomem *)dev->base_addr; #ifdef TC35815_NAPI @@ -1444,8 +1450,8 @@ static irqreturn_t tc35815_interrupt(int if (!(dmactl & DMA_IntMask)) { /* disable interrupts */ tc_writel(dmactl | DMA_IntMask, &tr->DMA_Ctl); - if (netif_rx_schedule_prep(dev)) - __netif_rx_schedule(dev); + if (netif_rx_schedule_prep(dev, &lp->napi)) + __netif_rx_schedule(dev, &lp->napi); else { printk(KERN_ERR "%s: interrupt taken in poll\n", dev->name); @@ -1726,13 +1732,12 @@ tc35815_rx(struct net_device *dev) } #ifdef TC35815_NAPI -static int -tc35815_poll(struct net_device *dev, int *budget) +static int tc35815_poll(struct napi_struct *napi, int budget) { - struct tc35815_local *lp = dev->priv; + struct tc35815_local *lp = container_of(napi, struct tc35815_local, napi); + struct net_device *dev = lp->dev; struct tc35815_regs __iomem *tr = (struct tc35815_regs __iomem *)dev->base_addr; - int limit = min(*budget, dev->quota); int received = 0, handled; u32 status; @@ -1744,23 +1749,19 @@ tc35815_poll(struct net_device *dev, int handled = tc35815_do_interrupt(dev, status, limit); if (handled >= 0) { received += handled; - limit -= handled; - if (limit <= 0) + if (received >= budget) break; } status = tc_readl(&tr->Int_Src); } while (status); spin_unlock(&lp->lock); - dev->quota -= received; - *budget -= received; - if (limit <= 0) - return 1; - - netif_rx_complete(dev); - /* enable interrupts */ - tc_writel(tc_readl(&tr->DMA_Ctl) & ~DMA_IntMask, &tr->DMA_Ctl); - return 0; + if (received < budget) { + netif_rx_complete(dev, napi); + /* enable interrupts */ + tc_writel(tc_readl(&tr->DMA_Ctl) & ~DMA_IntMask, &tr->DMA_Ctl); + } + return received; } #endif @@ -1949,7 +1950,11 @@ static int tc35815_close(struct net_device *dev) { struct tc35815_local *lp = dev->priv; + netif_stop_queue(dev); +#ifdef TC35815_NAPI + napi_disable(&lp->napi); +#endif /* Flush the Tx and disable Rx here. */ diff -puN drivers/net/tg3.c~git-net drivers/net/tg3.c --- a/drivers/net/tg3.c~git-net +++ a/drivers/net/tg3.c @@ -574,7 +574,7 @@ static void tg3_restart_ints(struct tg3 static inline void tg3_netif_stop(struct tg3 *tp) { tp->dev->trans_start = jiffies; /* prevent tx timeout */ - netif_poll_disable(tp->dev); + napi_disable(&tp->napi); netif_tx_disable(tp->dev); } @@ -585,7 +585,7 @@ static inline void tg3_netif_start(struc * so long as all callers are assured to have free tx slots * (such as after tg3_init_hw) */ - netif_poll_enable(tp->dev); + napi_enable(&tp->napi); tp->hw_status->status |= SD_STATUS_UPDATED; tg3_enable_ints(tp); } @@ -3471,11 +3471,12 @@ next_pkt_nopost: return received; } -static int tg3_poll(struct net_device *netdev, int *budget) +static int tg3_poll(struct napi_struct *napi, int budget) { - struct tg3 *tp = netdev_priv(netdev); + struct tg3 *tp = container_of(napi, struct tg3, napi); + struct net_device *netdev = tp->dev; struct tg3_hw_status *sblk = tp->hw_status; - int done; + int work_done = 0; /* handle link change and other phy events */ if (!(tp->tg3_flags & @@ -3494,7 +3495,7 @@ static int tg3_poll(struct net_device *n if (sblk->idx[0].tx_consumer != tp->tx_cons) { tg3_tx(tp); if (unlikely(tp->tg3_flags & TG3_FLAG_TX_RECOVERY_PENDING)) { - netif_rx_complete(netdev); + netif_rx_complete(netdev, napi); schedule_work(&tp->reset_task); return 0; } @@ -3502,20 +3503,10 @@ static int tg3_poll(struct net_device *n /* run RX thread, within the bounds set by NAPI. * All RX "locking" is done by ensuring outside - * code synchronizes with dev->poll() + * code synchronizes with tg3->napi.poll() */ - if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) { - int orig_budget = *budget; - int work_done; - - if (orig_budget > netdev->quota) - orig_budget = netdev->quota; - - work_done = tg3_rx(tp, orig_budget); - - *budget -= work_done; - netdev->quota -= work_done; - } + if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) + work_done = tg3_rx(tp, budget); if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) { tp->last_tag = sblk->status_tag; @@ -3524,13 +3515,12 @@ static int tg3_poll(struct net_device *n sblk->status &= ~SD_STATUS_UPDATED; /* if no more work, tell net stack and NIC we're done */ - done = !tg3_has_work(tp); - if (done) { - netif_rx_complete(netdev); + if (!tg3_has_work(tp)) { + netif_rx_complete(netdev, napi); tg3_restart_ints(tp); } - return (done ? 0 : 1); + return work_done; } static void tg3_irq_quiesce(struct tg3 *tp) @@ -3577,7 +3567,7 @@ static irqreturn_t tg3_msi_1shot(int irq prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]); if (likely(!tg3_irq_sync(tp))) - netif_rx_schedule(dev); /* schedule NAPI poll */ + netif_rx_schedule(dev, &tp->napi); return IRQ_HANDLED; } @@ -3602,7 +3592,7 @@ static irqreturn_t tg3_msi(int irq, void */ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); if (likely(!tg3_irq_sync(tp))) - netif_rx_schedule(dev); /* schedule NAPI poll */ + netif_rx_schedule(dev, &tp->napi); return IRQ_RETVAL(1); } @@ -3644,7 +3634,7 @@ static irqreturn_t tg3_interrupt(int irq sblk->status &= ~SD_STATUS_UPDATED; if (likely(tg3_has_work(tp))) { prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]); - netif_rx_schedule(dev); /* schedule NAPI poll */ + netif_rx_schedule(dev, &tp->napi); } else { /* No work, shared interrupt perhaps? re-enable * interrupts, and flush that PCI write @@ -3690,7 +3680,7 @@ static irqreturn_t tg3_interrupt_tagged( tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001); if (tg3_irq_sync(tp)) goto out; - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &tp->napi)) { prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]); /* Update last_tag to mark that this status has been * seen. Because interrupt may be shared, we may be @@ -3698,7 +3688,7 @@ static irqreturn_t tg3_interrupt_tagged( * if tg3_poll() is not scheduled. */ tp->last_tag = sblk->status_tag; - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &tp->napi); } out: return IRQ_RETVAL(handled); @@ -3737,7 +3727,7 @@ static int tg3_restart_hw(struct tg3 *tp tg3_full_unlock(tp); del_timer_sync(&tp->timer); tp->irq_sync = 0; - netif_poll_enable(tp->dev); + napi_enable(&tp->napi); dev_close(tp->dev); tg3_full_lock(tp, 0); } @@ -3932,7 +3922,7 @@ static int tg3_start_xmit(struct sk_buff len = skb_headlen(skb); /* We are running in BH disabled context with netif_tx_lock - * and TX reclaim runs via tp->poll inside of a software + * and TX reclaim runs via tp->napi.poll inside of a software * interrupt. Furthermore, IRQ processing runs lockless so we have * no IRQ context deadlocks to worry about either. Rejoice! */ @@ -4087,7 +4077,7 @@ static int tg3_start_xmit_dma_bug(struct len = skb_headlen(skb); /* We are running in BH disabled context with netif_tx_lock - * and TX reclaim runs via tp->poll inside of a software + * and TX reclaim runs via tp->napi.poll inside of a software * interrupt. Furthermore, IRQ processing runs lockless so we have * no IRQ context deadlocks to worry about either. Rejoice! */ @@ -7143,6 +7133,8 @@ static int tg3_open(struct net_device *d return err; } + napi_enable(&tp->napi); + tg3_full_lock(tp, 0); err = tg3_init_hw(tp, 1); @@ -7170,6 +7162,7 @@ static int tg3_open(struct net_device *d tg3_full_unlock(tp); if (err) { + napi_disable(&tp->napi); free_irq(tp->pdev->irq, dev); if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) { pci_disable_msi(tp->pdev); @@ -7195,6 +7188,8 @@ static int tg3_open(struct net_device *d tg3_full_unlock(tp); + napi_disable(&tp->napi); + return err; } @@ -7456,6 +7451,7 @@ static int tg3_close(struct net_device * { struct tg3 *tp = netdev_priv(dev); + napi_disable(&tp->napi); cancel_work_sync(&tp->reset_task); netif_stop_queue(dev); @@ -11896,9 +11892,8 @@ static int __devinit tg3_init_one(struct dev->set_mac_address = tg3_set_mac_addr; dev->do_ioctl = tg3_ioctl; dev->tx_timeout = tg3_tx_timeout; - dev->poll = tg3_poll; + netif_napi_add(dev, &tp->napi, tg3_poll, 64); dev->ethtool_ops = &tg3_ethtool_ops; - dev->weight = 64; dev->watchdog_timeo = TG3_TX_TIMEOUT; dev->change_mtu = tg3_change_mtu; dev->irq = pdev->irq; diff -puN drivers/net/tg3.h~git-net drivers/net/tg3.h --- a/drivers/net/tg3.h~git-net +++ a/drivers/net/tg3.h @@ -2176,6 +2176,7 @@ struct tg3 { dma_addr_t tx_desc_mapping; /* begin "rx thread" cacheline section */ + struct napi_struct napi; void (*write32_rx_mbox) (struct tg3 *, u32, u32); u32 rx_rcb_ptr; diff -puN drivers/net/tsi108_eth.c~git-net drivers/net/tsi108_eth.c --- a/drivers/net/tsi108_eth.c~git-net +++ a/drivers/net/tsi108_eth.c @@ -78,6 +78,9 @@ struct tsi108_prv_data { void __iomem *regs; /* Base of normal regs */ void __iomem *phyregs; /* Base of register bank used for PHY access */ + struct net_device *dev; + struct napi_struct napi; + unsigned int phy; /* Index of PHY for this interface */ unsigned int irq_num; unsigned int id; @@ -836,13 +839,13 @@ static int tsi108_refill_rx(struct net_d return done; } -static int tsi108_poll(struct net_device *dev, int *budget) +static int tsi108_poll(struct napi_struct *napi, int budget) { - struct tsi108_prv_data *data = netdev_priv(dev); + struct tsi108_prv_data *data = container_of(napi, struct tsi108_prv_data, napi); + struct net_device *dev = data->dev; u32 estat = TSI_READ(TSI108_EC_RXESTAT); u32 intstat = TSI_READ(TSI108_EC_INTSTAT); - int total_budget = min(*budget, dev->quota); - int num_received = 0, num_filled = 0, budget_used; + int num_received = 0, num_filled = 0; intstat &= TSI108_INT_RXQUEUE0 | TSI108_INT_RXTHRESH | TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR | TSI108_INT_RXWAIT; @@ -851,7 +854,7 @@ static int tsi108_poll(struct net_device TSI_WRITE(TSI108_EC_INTSTAT, intstat); if (data->rxpending || (estat & TSI108_EC_RXESTAT_Q0_DESCINT)) - num_received = tsi108_complete_rx(dev, total_budget); + num_received = tsi108_complete_rx(dev, budget); /* This should normally fill no more slots than the number of * packets received in tsi108_complete_rx(). The exception @@ -866,7 +869,7 @@ static int tsi108_poll(struct net_device */ if (data->rxfree < TSI108_RXRING_LEN) - num_filled = tsi108_refill_rx(dev, total_budget * 2); + num_filled = tsi108_refill_rx(dev, budget * 2); if (intstat & TSI108_INT_RXERROR) { u32 err = TSI_READ(TSI108_EC_RXERR); @@ -889,14 +892,9 @@ static int tsi108_poll(struct net_device spin_unlock_irq(&data->misclock); } - budget_used = max(num_received, num_filled / 2); - - *budget -= budget_used; - dev->quota -= budget_used; - - if (budget_used != total_budget) { + if (num_received < budget) { data->rxpending = 0; - netif_rx_complete(dev); + netif_rx_complete(dev, napi); TSI_WRITE(TSI108_EC_INTMASK, TSI_READ(TSI108_EC_INTMASK) @@ -905,14 +903,11 @@ static int tsi108_poll(struct net_device TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR | TSI108_INT_RXWAIT)); - - /* IRQs are level-triggered, so no need to re-check */ - return 0; } else { data->rxpending = 1; } - return 1; + return num_received; } static void tsi108_rx_int(struct net_device *dev) @@ -930,7 +925,7 @@ static void tsi108_rx_int(struct net_dev * from tsi108_check_rxring(). */ - if (netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &data->napi)) { /* Mask, rather than ack, the receive interrupts. The ack * will happen in tsi108_poll(). */ @@ -941,7 +936,7 @@ static void tsi108_rx_int(struct net_dev | TSI108_INT_RXTHRESH | TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR | TSI108_INT_RXWAIT); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &data->napi); } else { if (!netif_running(dev)) { /* This can happen if an interrupt occurs while the @@ -1400,6 +1395,8 @@ static int tsi108_open(struct net_device TSI_WRITE(TSI108_EC_TXQ_PTRLOW, data->txdma); tsi108_init_phy(dev); + napi_enable(&data->napi); + setup_timer(&data->timer, tsi108_timed_checker, (unsigned long)dev); mod_timer(&data->timer, jiffies + 1); @@ -1424,6 +1421,7 @@ static int tsi108_close(struct net_devic struct tsi108_prv_data *data = netdev_priv(dev); netif_stop_queue(dev); + napi_disable(&data->napi); del_timer_sync(&data->timer); @@ -1561,6 +1559,7 @@ tsi108_init_one(struct platform_device * printk("tsi108_eth%d: probe...\n", pdev->id); data = netdev_priv(dev); + data->dev = dev; pr_debug("tsi108_eth%d:regs:phyresgs:phy:irq_num=0x%x:0x%x:0x%x:0x%x\n", pdev->id, einfo->regs, einfo->phyregs, @@ -1596,9 +1595,8 @@ tsi108_init_one(struct platform_device * dev->set_mac_address = tsi108_set_mac; dev->set_multicast_list = tsi108_set_rx_mode; dev->get_stats = tsi108_get_stats; - dev->poll = tsi108_poll; + netif_napi_add(dev, &data->napi, tsi108_poll, 64); dev->do_ioctl = tsi108_do_ioctl; - dev->weight = 64; /* 64 is more suitable for GigE interface - klai */ /* Apparently, the Linux networking code won't use scatter-gather * if the hardware doesn't do checksums. However, it's faster diff -puN drivers/net/tulip/interrupt.c~git-net drivers/net/tulip/interrupt.c --- a/drivers/net/tulip/interrupt.c~git-net +++ a/drivers/net/tulip/interrupt.c @@ -103,28 +103,29 @@ int tulip_refill_rx(struct net_device *d void oom_timer(unsigned long data) { struct net_device *dev = (struct net_device *)data; - netif_rx_schedule(dev); + struct tulip_private *tp = netdev_priv(dev); + netif_rx_schedule(dev, &tp->napi); } -int tulip_poll(struct net_device *dev, int *budget) +int tulip_poll(struct napi_struct *napi, int budget) { - struct tulip_private *tp = netdev_priv(dev); + struct tulip_private *tp = container_of(napi, struct tulip_private, napi); + struct net_device *dev = tp->dev; int entry = tp->cur_rx % RX_RING_SIZE; - int rx_work_limit = *budget; + int work_done = 0; +#ifdef CONFIG_TULIP_NAPI_HW_MITIGATION int received = 0; +#endif if (!netif_running(dev)) goto done; - if (rx_work_limit > dev->quota) - rx_work_limit = dev->quota; - #ifdef CONFIG_TULIP_NAPI_HW_MITIGATION /* that one buffer is needed for mit activation; or might be a bug in the ring buffer code; check later -- JHS*/ - if (rx_work_limit >=RX_RING_SIZE) rx_work_limit--; + if (budget >=RX_RING_SIZE) budget--; #endif if (tulip_debug > 4) @@ -144,14 +145,13 @@ int tulip_poll(struct net_device *dev, i while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) { s32 status = le32_to_cpu(tp->rx_ring[entry].status); - if (tp->dirty_rx + RX_RING_SIZE == tp->cur_rx) break; if (tulip_debug > 5) printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n", dev->name, entry, status); - if (--rx_work_limit < 0) + if (work_done++ >= budget) goto not_done; if ((status & 0x38008300) != 0x0300) { @@ -238,7 +238,9 @@ int tulip_poll(struct net_device *dev, i tp->stats.rx_packets++; tp->stats.rx_bytes += pkt_len; } - received++; +#ifdef CONFIG_TULIP_NAPI_HW_MITIGATION + received++; +#endif entry = (++tp->cur_rx) % RX_RING_SIZE; if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/4) @@ -296,17 +298,15 @@ done: #endif /* CONFIG_TULIP_NAPI_HW_MITIGATION */ - dev->quota -= received; - *budget -= received; - tulip_refill_rx(dev); /* If RX ring is not full we are out of memory. */ - if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) goto oom; + if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) + goto oom; /* Remove us from polling list and enable RX intr. */ - netif_rx_complete(dev); + netif_rx_complete(dev, napi); iowrite32(tulip_tbl[tp->chip_id].valid_intrs, tp->base_addr+CSR7); /* The last op happens after poll completion. Which means the following: @@ -320,28 +320,20 @@ done: * processed irqs. But it must not result in losing events. */ - return 0; + return work_done; not_done: - if (!received) { - - received = dev->quota; /* Not to happen */ - } - dev->quota -= received; - *budget -= received; - if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 || tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) tulip_refill_rx(dev); - if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) goto oom; - - return 1; + if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) + goto oom; + return work_done; oom: /* Executed with RX ints disabled */ - /* Start timer, stop polling, but do not enable rx interrupts. */ mod_timer(&tp->oom_timer, jiffies+1); @@ -350,9 +342,9 @@ done: * before we did netif_rx_complete(). See? We would lose it. */ /* remove ourselves from the polling list */ - netif_rx_complete(dev); + netif_rx_complete(dev, napi); - return 0; + return work_done; } #else /* CONFIG_TULIP_NAPI */ @@ -534,7 +526,7 @@ irqreturn_t tulip_interrupt(int irq, voi rxd++; /* Mask RX intrs and add the device to poll list. */ iowrite32(tulip_tbl[tp->chip_id].valid_intrs&~RxPollInt, ioaddr + CSR7); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &tp->napi); if (!(csr5&~(AbnormalIntr|NormalIntr|RxPollInt|TPLnkPass))) break; diff -puN drivers/net/tulip/tulip.h~git-net drivers/net/tulip/tulip.h --- a/drivers/net/tulip/tulip.h~git-net +++ a/drivers/net/tulip/tulip.h @@ -353,6 +353,7 @@ struct tulip_private { int chip_id; int revision; int flags; + struct napi_struct napi; struct net_device_stats stats; struct timer_list timer; /* Media selection timer. */ struct timer_list oom_timer; /* Out of memory timer. */ @@ -429,7 +430,7 @@ extern int tulip_rx_copybreak; irqreturn_t tulip_interrupt(int irq, void *dev_instance); int tulip_refill_rx(struct net_device *dev); #ifdef CONFIG_TULIP_NAPI -int tulip_poll(struct net_device *dev, int *budget); +int tulip_poll(struct napi_struct *napi, int budget); #endif diff -puN drivers/net/tulip/tulip_core.c~git-net drivers/net/tulip/tulip_core.c --- a/drivers/net/tulip/tulip_core.c~git-net +++ a/drivers/net/tulip/tulip_core.c @@ -294,6 +294,10 @@ static void tulip_up(struct net_device * int next_tick = 3*HZ; int i; +#ifdef CONFIG_TULIP_NAPI + napi_enable(&tp->napi); +#endif + /* Wake the chip from sleep/snooze mode. */ tulip_set_power_state (tp, 0, 0); @@ -728,6 +732,10 @@ static void tulip_down (struct net_devic flush_scheduled_work(); +#ifdef CONFIG_TULIP_NAPI + napi_disable(&tp->napi); +#endif + del_timer_sync (&tp->timer); #ifdef CONFIG_TULIP_NAPI del_timer_sync (&tp->oom_timer); @@ -1606,8 +1614,7 @@ static int __devinit tulip_init_one (str dev->tx_timeout = tulip_tx_timeout; dev->watchdog_timeo = TX_TIMEOUT; #ifdef CONFIG_TULIP_NAPI - dev->poll = tulip_poll; - dev->weight = 16; + netif_napi_add(dev, &tp->napi, tulip_poll, 16); #endif dev->stop = tulip_close; dev->get_stats = tulip_get_stats; diff -puN drivers/net/typhoon.c~git-net drivers/net/typhoon.c --- a/drivers/net/typhoon.c~git-net +++ a/drivers/net/typhoon.c @@ -284,6 +284,7 @@ struct typhoon { struct basic_ring rxLoRing; struct pci_dev * pdev; struct net_device * dev; + struct napi_struct napi; spinlock_t state_lock; struct vlan_group * vlgrp; struct basic_ring rxHiRing; @@ -1759,12 +1760,12 @@ typhoon_fill_free_ring(struct typhoon *t } static int -typhoon_poll(struct net_device *dev, int *total_budget) +typhoon_poll(struct napi_struct *napi, int budget) { - struct typhoon *tp = netdev_priv(dev); + struct typhoon *tp = container_of(napi, struct typhoon, napi); + struct net_device *dev = tp->dev; struct typhoon_indexes *indexes = tp->indexes; - int orig_budget = *total_budget; - int budget, work_done, done; + int work_done; rmb(); if(!tp->awaiting_resp && indexes->respReady != indexes->respCleared) @@ -1773,30 +1774,16 @@ typhoon_poll(struct net_device *dev, int if(le32_to_cpu(indexes->txLoCleared) != tp->txLoRing.lastRead) typhoon_tx_complete(tp, &tp->txLoRing, &indexes->txLoCleared); - if(orig_budget > dev->quota) - orig_budget = dev->quota; - - budget = orig_budget; work_done = 0; - done = 1; if(indexes->rxHiCleared != indexes->rxHiReady) { - work_done = typhoon_rx(tp, &tp->rxHiRing, &indexes->rxHiReady, + work_done += typhoon_rx(tp, &tp->rxHiRing, &indexes->rxHiReady, &indexes->rxHiCleared, budget); - budget -= work_done; } if(indexes->rxLoCleared != indexes->rxLoReady) { work_done += typhoon_rx(tp, &tp->rxLoRing, &indexes->rxLoReady, - &indexes->rxLoCleared, budget); - } - - if(work_done) { - *total_budget -= work_done; - dev->quota -= work_done; - - if(work_done >= orig_budget) - done = 0; + &indexes->rxLoCleared, budget - work_done); } if(le32_to_cpu(indexes->rxBuffCleared) == tp->rxBuffRing.lastWrite) { @@ -1804,14 +1791,14 @@ typhoon_poll(struct net_device *dev, int typhoon_fill_free_ring(tp); } - if(done) { - netif_rx_complete(dev); + if (work_done < budget) { + netif_rx_complete(dev, napi); iowrite32(TYPHOON_INTR_NONE, tp->ioaddr + TYPHOON_REG_INTR_MASK); typhoon_post_pci_writes(tp->ioaddr); } - return (done ? 0 : 1); + return work_done; } static irqreturn_t @@ -1828,10 +1815,10 @@ typhoon_interrupt(int irq, void *dev_ins iowrite32(intr_status, ioaddr + TYPHOON_REG_INTR_STATUS); - if(netif_rx_schedule_prep(dev)) { + if (netif_rx_schedule_prep(dev, &tp->napi)) { iowrite32(TYPHOON_INTR_ALL, ioaddr + TYPHOON_REG_INTR_MASK); typhoon_post_pci_writes(ioaddr); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &tp->napi); } else { printk(KERN_ERR "%s: Error, poll already scheduled\n", dev->name); @@ -2119,9 +2106,13 @@ typhoon_open(struct net_device *dev) if(err < 0) goto out_sleep; + napi_enable(&tp->napi); + err = typhoon_start_runtime(tp); - if(err < 0) + if(err < 0) { + napi_disable(&tp->napi); goto out_irq; + } netif_start_queue(dev); return 0; @@ -2150,6 +2141,7 @@ typhoon_close(struct net_device *dev) struct typhoon *tp = netdev_priv(dev); netif_stop_queue(dev); + napi_disable(&tp->napi); if(typhoon_stop_runtime(tp, WaitSleep) < 0) printk(KERN_ERR "%s: unable to stop runtime\n", dev->name); @@ -2521,8 +2513,7 @@ typhoon_init_one(struct pci_dev *pdev, c dev->stop = typhoon_close; dev->set_multicast_list = typhoon_set_rx_mode; dev->tx_timeout = typhoon_tx_timeout; - dev->poll = typhoon_poll; - dev->weight = 16; + netif_napi_add(dev, &tp->napi, typhoon_poll, 16); dev->watchdog_timeo = TX_TIMEOUT; dev->get_stats = typhoon_get_stats; dev->set_mac_address = typhoon_set_mac_address; diff -puN drivers/net/ucc_geth.c~git-net drivers/net/ucc_geth.c --- a/drivers/net/ucc_geth.c~git-net +++ a/drivers/net/ucc_geth.c @@ -3582,41 +3582,31 @@ static int ucc_geth_tx(struct net_device } #ifdef CONFIG_UGETH_NAPI -static int ucc_geth_poll(struct net_device *dev, int *budget) +static int ucc_geth_poll(struct napi_struct *napi, int budget) { - struct ucc_geth_private *ugeth = netdev_priv(dev); + struct ucc_geth_private *ugeth = container_of(napi, struct ucc_geth_private, napi); + struct net_device *dev = ugeth->dev; struct ucc_geth_info *ug_info; - struct ucc_fast_private *uccf; - int howmany; - u8 i; - int rx_work_limit; - register u32 uccm; + int howmany, i; ug_info = ugeth->ug_info; - rx_work_limit = *budget; - if (rx_work_limit > dev->quota) - rx_work_limit = dev->quota; - howmany = 0; + for (i = 0; i < ug_info->numQueuesRx; i++) + howmany += ucc_geth_rx(ugeth, i, budget - howmany); - for (i = 0; i < ug_info->numQueuesRx; i++) { - howmany += ucc_geth_rx(ugeth, i, rx_work_limit); - } + if (howmany < budget) { + struct ucc_fast_private *uccf; + u32 uccm; - dev->quota -= howmany; - rx_work_limit -= howmany; - *budget -= howmany; - - if (rx_work_limit > 0) { - netif_rx_complete(dev); + netif_rx_complete(dev, napi); uccf = ugeth->uccf; uccm = in_be32(uccf->p_uccm); uccm |= UCCE_RX_EVENTS; out_be32(uccf->p_uccm, uccm); } - return (rx_work_limit > 0) ? 0 : 1; + return howmany; } #endif /* CONFIG_UGETH_NAPI */ @@ -3651,10 +3641,10 @@ static irqreturn_t ucc_geth_irq_handler( /* check for receive events that require processing */ if (ucce & UCCE_RX_EVENTS) { #ifdef CONFIG_UGETH_NAPI - if (netif_rx_schedule_prep(dev)) { - uccm &= ~UCCE_RX_EVENTS; + if (netif_rx_schedule_prep(dev, &ugeth->napi)) { + uccm &= ~UCCE_RX_EVENTS; out_be32(uccf->p_uccm, uccm); - __netif_rx_schedule(dev); + __netif_rx_schedule(dev, &ugeth->napi); } #else rx_mask = UCCE_RXBF_SINGLE_MASK; @@ -3717,12 +3707,15 @@ static int ucc_geth_open(struct net_devi return err; } +#ifdef CONFIG_UGETH_NAPI + napi_enable(&ugeth->napi); +#endif err = ucc_geth_startup(ugeth); if (err) { if (netif_msg_ifup(ugeth)) ugeth_err("%s: Cannot configure net device, aborting.", dev->name); - return err; + goto out_err; } err = adjust_enet_interface(ugeth); @@ -3730,7 +3723,7 @@ static int ucc_geth_open(struct net_devi if (netif_msg_ifup(ugeth)) ugeth_err("%s: Cannot configure net device, aborting.", dev->name); - return err; + goto out_err; } /* Set MACSTNADDR1, MACSTNADDR2 */ @@ -3748,7 +3741,7 @@ static int ucc_geth_open(struct net_devi if (err) { if (netif_msg_ifup(ugeth)) ugeth_err("%s: Cannot initialize PHY, aborting.", dev->name); - return err; + goto out_err; } phy_start(ugeth->phydev); @@ -3761,7 +3754,7 @@ static int ucc_geth_open(struct net_devi ugeth_err("%s: Cannot get IRQ for net device, aborting.", dev->name); ucc_geth_stop(ugeth); - return err; + goto out_err; } err = ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); @@ -3769,12 +3762,18 @@ static int ucc_geth_open(struct net_devi if (netif_msg_ifup(ugeth)) ugeth_err("%s: Cannot enable net device, aborting.", dev->name); ucc_geth_stop(ugeth); - return err; + goto out_err; } netif_start_queue(dev); return err; + +out_err: +#ifdef CONFIG_UGETH_NAPI + napi_disable(&ugeth->napi); +#endif + return err; } /* Stops the kernel queue, and halts the controller */ @@ -3784,6 +3783,10 @@ static int ucc_geth_close(struct net_dev ugeth_vdbg("%s: IN", __FUNCTION__); +#ifdef CONFIG_UGETH_NAPI + napi_disable(&ugeth->napi); +#endif + ucc_geth_stop(ugeth); phy_disconnect(ugeth->phydev); @@ -3964,8 +3967,7 @@ static int ucc_geth_probe(struct of_devi dev->tx_timeout = ucc_geth_timeout; dev->watchdog_timeo = TX_TIMEOUT; #ifdef CONFIG_UGETH_NAPI - dev->poll = ucc_geth_poll; - dev->weight = UCC_GETH_DEV_WEIGHT; + netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, UCC_GETH_DEV_WEIGHT); #endif /* CONFIG_UGETH_NAPI */ dev->stop = ucc_geth_close; dev->get_stats = ucc_geth_get_stats; diff -puN drivers/net/ucc_geth.h~git-net drivers/net/ucc_geth.h --- a/drivers/net/ucc_geth.h~git-net +++ a/drivers/net/ucc_geth.h @@ -1184,6 +1184,7 @@ struct ucc_geth_private { struct ucc_geth_info *ug_info; struct ucc_fast_private *uccf; struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; /* linux network statistics */ struct ucc_geth *ug_regs; struct ucc_geth_init_pram *p_init_enet_param_shadow; diff -puN /dev/null drivers/net/veth.c --- /dev/null +++ a/drivers/net/veth.c @@ -0,0 +1,474 @@ +/* + * drivers/net/veth.c + * + * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc + * + * Author: Pavel Emelianov + * Ethtool interface from: Eric W. Biederman + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define DRV_NAME "veth" +#define DRV_VERSION "1.0" + +struct veth_net_stats { + unsigned long rx_packets; + unsigned long tx_packets; + unsigned long rx_bytes; + unsigned long tx_bytes; + unsigned long tx_dropped; +}; + +struct veth_priv { + struct net_device *peer; + struct net_device *dev; + struct list_head list; + struct veth_net_stats *stats; + unsigned ip_summed; +}; + +static LIST_HEAD(veth_list); + +/* + * ethtool interface + */ + +static struct { + const char string[ETH_GSTRING_LEN]; +} ethtool_stats_keys[] = { + { "peer_ifindex" }, +}; + +static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + cmd->supported = 0; + cmd->advertising = 0; + cmd->speed = SPEED_10000; + cmd->duplex = DUPLEX_FULL; + cmd->port = PORT_TP; + cmd->phy_address = 0; + cmd->transceiver = XCVR_INTERNAL; + cmd->autoneg = AUTONEG_DISABLE; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; + return 0; +} + +static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); + strcpy(info->fw_version, "N/A"); +} + +static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf) +{ + switch(stringset) { + case ETH_SS_STATS: + memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); + break; + } +} + +static int veth_get_stats_count(struct net_device *dev) +{ + return ARRAY_SIZE(ethtool_stats_keys); +} + +static void veth_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + data[0] = priv->peer->ifindex; +} + +static u32 veth_get_rx_csum(struct net_device *dev) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + return priv->ip_summed == CHECKSUM_UNNECESSARY; +} + +static int veth_set_rx_csum(struct net_device *dev, u32 data) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + priv->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; + return 0; +} + +static u32 veth_get_tx_csum(struct net_device *dev) +{ + return (dev->features & NETIF_F_NO_CSUM) != 0; +} + +static int veth_set_tx_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_NO_CSUM; + else + dev->features &= ~NETIF_F_NO_CSUM; + return 0; +} + +static struct ethtool_ops veth_ethtool_ops = { + .get_settings = veth_get_settings, + .get_drvinfo = veth_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_rx_csum = veth_get_rx_csum, + .set_rx_csum = veth_set_rx_csum, + .get_tx_csum = veth_get_tx_csum, + .set_tx_csum = veth_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_strings = veth_get_strings, + .get_stats_count = veth_get_stats_count, + .get_ethtool_stats = veth_get_ethtool_stats, +}; + +/* + * xmit + */ + +static int veth_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct net_device *rcv = NULL; + struct veth_priv *priv, *rcv_priv; + struct veth_net_stats *stats; + int length, cpu; + + skb_orphan(skb); + + priv = netdev_priv(dev); + rcv = priv->peer; + rcv_priv = netdev_priv(rcv); + + cpu = smp_processor_id(); + stats = per_cpu_ptr(priv->stats, cpu); + + if (!(rcv->flags & IFF_UP)) + goto outf; + + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, rcv); + if (dev->features & NETIF_F_NO_CSUM) + skb->ip_summed = rcv_priv->ip_summed; + + dst_release(skb->dst); + skb->dst = NULL; + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + + length = skb->len; + + stats->tx_bytes += length; + stats->tx_packets++; + + stats = per_cpu_ptr(rcv_priv->stats, cpu); + stats->rx_bytes += length; + stats->rx_packets++; + + netif_rx(skb); + return 0; + +outf: + kfree_skb(skb); + stats->tx_dropped++; + return 0; +} + +/* + * general routines + */ + +static struct net_device_stats *veth_get_stats(struct net_device *dev) +{ + struct veth_priv *priv; + struct net_device_stats *dev_stats; + int cpu; + struct veth_net_stats *stats; + + priv = netdev_priv(dev); + dev_stats = &dev->stats; + + dev_stats->rx_packets = 0; + dev_stats->tx_packets = 0; + dev_stats->rx_bytes = 0; + dev_stats->tx_bytes = 0; + dev_stats->tx_dropped = 0; + + for_each_online_cpu(cpu) { + stats = per_cpu_ptr(priv->stats, cpu); + + dev_stats->rx_packets += stats->rx_packets; + dev_stats->tx_packets += stats->tx_packets; + dev_stats->rx_bytes += stats->rx_bytes; + dev_stats->tx_bytes += stats->tx_bytes; + dev_stats->tx_dropped += stats->tx_dropped; + } + + return dev_stats; +} + +static int veth_open(struct net_device *dev) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + if (priv->peer == NULL) + return -ENOTCONN; + + if (priv->peer->flags & IFF_UP) { + netif_carrier_on(dev); + netif_carrier_on(priv->peer); + } + return 0; +} + +static int veth_close(struct net_device *dev) +{ + struct veth_priv *priv; + + if (netif_carrier_ok(dev)) { + priv = netdev_priv(dev); + netif_carrier_off(dev); + netif_carrier_off(priv->peer); + } + return 0; +} + +static int veth_dev_init(struct net_device *dev) +{ + struct veth_net_stats *stats; + struct veth_priv *priv; + + stats = alloc_percpu(struct veth_net_stats); + if (stats == NULL) + return -ENOMEM; + + priv = netdev_priv(dev); + priv->stats = stats; + return 0; +} + +static void veth_dev_free(struct net_device *dev) +{ + struct veth_priv *priv; + + priv = netdev_priv(dev); + free_percpu(priv->stats); + free_netdev(dev); +} + +static void veth_setup(struct net_device *dev) +{ + ether_setup(dev); + + dev->hard_start_xmit = veth_xmit; + dev->get_stats = veth_get_stats; + dev->open = veth_open; + dev->stop = veth_close; + dev->ethtool_ops = &veth_ethtool_ops; + dev->features |= NETIF_F_LLTX; + dev->init = veth_dev_init; + dev->destructor = veth_dev_free; + netif_carrier_off(dev); +} + +/* + * netlink interface + */ + +static int veth_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + +static struct rtnl_link_ops veth_link_ops; + +static int veth_newlink(struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + int err; + struct net_device *peer; + struct veth_priv *priv; + char ifname[IFNAMSIZ]; + struct nlattr *peer_tb[IFLA_MAX + 1], **tbp; + + /* + * create and register peer first + * + * struct ifinfomsg is at the head of VETH_INFO_PEER, but we + * skip it since no info from it is useful yet + */ + + if (data != NULL && data[VETH_INFO_PEER] != NULL) { + struct nlattr *nla_peer; + + nla_peer = data[VETH_INFO_PEER]; + err = nla_parse(peer_tb, IFLA_MAX, + nla_data(nla_peer) + sizeof(struct ifinfomsg), + nla_len(nla_peer) - sizeof(struct ifinfomsg), + ifla_policy); + if (err < 0) + return err; + + err = veth_validate(peer_tb, NULL); + if (err < 0) + return err; + + tbp = peer_tb; + } else + tbp = tb; + + if (tbp[IFLA_IFNAME]) + nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); + else + snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); + + peer = rtnl_create_link(ifname, &veth_link_ops, tbp); + if (IS_ERR(peer)) + return PTR_ERR(peer); + + if (tbp[IFLA_ADDRESS] == NULL) + random_ether_addr(peer->dev_addr); + + err = register_netdevice(peer); + if (err < 0) + goto err_register_peer; + + /* + * register dev last + * + * note, that since we've registered new device the dev's name + * should be re-allocated + */ + + if (tb[IFLA_ADDRESS] == NULL) + random_ether_addr(dev->dev_addr); + + if (tb[IFLA_IFNAME]) + nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); + else + snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_alloc_name; + } + + err = register_netdevice(dev); + if (err < 0) + goto err_register_dev; + + /* + * tie the deviced together + */ + + priv = netdev_priv(dev); + priv->dev = dev; + priv->peer = peer; + list_add(&priv->list, &veth_list); + + priv = netdev_priv(peer); + priv->dev = peer; + priv->peer = dev; + INIT_LIST_HEAD(&priv->list); + return 0; + +err_register_dev: + /* nothing to do */ +err_alloc_name: + unregister_netdevice(peer); + return err; + +err_register_peer: + free_netdev(peer); + return err; +} + +static void veth_dellink(struct net_device *dev) +{ + struct veth_priv *priv; + struct net_device *peer; + + priv = netdev_priv(dev); + peer = priv->peer; + + if (!list_empty(&priv->list)) + list_del(&priv->list); + + priv = netdev_priv(peer); + if (!list_empty(&priv->list)) + list_del(&priv->list); + + unregister_netdevice(dev); + unregister_netdevice(peer); +} + +static const struct nla_policy veth_policy[VETH_INFO_MAX + 1]; + +static struct rtnl_link_ops veth_link_ops = { + .kind = DRV_NAME, + .priv_size = sizeof(struct veth_priv), + .setup = veth_setup, + .validate = veth_validate, + .newlink = veth_newlink, + .dellink = veth_dellink, + .policy = veth_policy, + .maxtype = VETH_INFO_MAX, +}; + +/* + * init/fini + */ + +static __init int veth_init(void) +{ + return rtnl_link_register(&veth_link_ops); +} + +static __exit void veth_exit(void) +{ + struct veth_priv *priv, *next; + + rtnl_lock(); + /* + * cannot trust __rtnl_link_unregister() to unregister all + * devices, as each ->dellink call will remove two devices + * from the list at once. + */ + list_for_each_entry_safe(priv, next, &veth_list, list) + veth_dellink(priv->dev); + + __rtnl_link_unregister(&veth_link_ops); + rtnl_unlock(); +} + +module_init(veth_init); +module_exit(veth_exit); + +MODULE_DESCRIPTION("Virtual Ethernet Tunnel"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff -puN drivers/net/via-rhine.c~git-net drivers/net/via-rhine.c --- a/drivers/net/via-rhine.c~git-net +++ a/drivers/net/via-rhine.c @@ -389,6 +389,8 @@ struct rhine_private { struct pci_dev *pdev; long pioaddr; + struct net_device *dev; + struct napi_struct napi; struct net_device_stats stats; spinlock_t lock; @@ -582,28 +584,25 @@ static void rhine_poll(struct net_device #endif #ifdef CONFIG_VIA_RHINE_NAPI -static int rhine_napipoll(struct net_device *dev, int *budget) +static int rhine_napipoll(struct napi_struct *napi, int budget) { - struct rhine_private *rp = netdev_priv(dev); + struct rhine_private *rp = container_of(napi, struct rhine_private, napi); + struct net_device *dev = rp->dev; void __iomem *ioaddr = rp->base; - int done, limit = min(dev->quota, *budget); + int work_done; - done = rhine_rx(dev, limit); - *budget -= done; - dev->quota -= done; + work_done = rhine_rx(dev, budget); - if (done < limit) { - netif_rx_complete(dev); + if (work_done < budget) { + netif_rx_complete(dev, napi); iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow | IntrRxDropped | IntrRxNoBuf | IntrTxAborted | IntrTxDone | IntrTxError | IntrTxUnderrun | IntrPCIErr | IntrStatsMax | IntrLinkChange, ioaddr + IntrEnable); - return 0; } - else - return 1; + return work_done; } #endif @@ -707,6 +706,7 @@ static int __devinit rhine_init_one(stru SET_NETDEV_DEV(dev, &pdev->dev); rp = netdev_priv(dev); + rp->dev = dev; rp->quirks = quirks; rp->pioaddr = pioaddr; rp->pdev = pdev; @@ -785,8 +785,7 @@ static int __devinit rhine_init_one(stru dev->poll_controller = rhine_poll; #endif #ifdef CONFIG_VIA_RHINE_NAPI - dev->poll = rhine_napipoll; - dev->weight = 64; + netif_napi_add(dev, &rp->napi, rhine_napipoll, 64); #endif if (rp->quirks & rqRhineI) dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM; @@ -1061,7 +1060,9 @@ static void init_registers(struct net_de rhine_set_rx_mode(dev); - netif_poll_enable(dev); +#ifdef CONFIG_VIA_RHINE_NAPI + napi_enable(&rp->napi); +#endif /* Enable interrupts by setting the interrupt mask. */ iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow | @@ -1196,6 +1197,10 @@ static void rhine_tx_timeout(struct net_ /* protect against concurrent rx interrupts */ disable_irq(rp->pdev->irq); +#ifdef CONFIG_VIA_RHINE_NAPI + napi_disable(&rp->napi); +#endif + spin_lock(&rp->lock); /* clear all descriptors */ @@ -1324,7 +1329,7 @@ static irqreturn_t rhine_interrupt(int i IntrPCIErr | IntrStatsMax | IntrLinkChange, ioaddr + IntrEnable); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &rp->napi); #else rhine_rx(dev, RX_RING_SIZE); #endif @@ -1837,7 +1842,9 @@ static int rhine_close(struct net_device spin_lock_irq(&rp->lock); netif_stop_queue(dev); - netif_poll_disable(dev); +#ifdef CONFIG_VIA_RHINE_NAPI + napi_disable(&rp->napi); +#endif if (debug > 1) printk(KERN_DEBUG "%s: Shutting down ethercard, " @@ -1936,6 +1943,9 @@ static int rhine_suspend(struct pci_dev if (!netif_running(dev)) return 0; +#ifdef CONFIG_VIA_RHINE_NAPI + napi_disable(&rp->napi); +#endif netif_device_detach(dev); pci_save_state(pdev); diff -puN drivers/net/wireless/rtl8187.h~git-net drivers/net/wireless/rtl8187.h --- a/drivers/net/wireless/rtl8187.h~git-net +++ a/drivers/net/wireless/rtl8187.h @@ -67,6 +67,7 @@ struct rtl8187_priv { struct rtl818x_csr *map; void (*rf_init)(struct ieee80211_hw *); int mode; + int if_id; /* rtl8187 specific */ struct ieee80211_channel channels[14]; diff -puN drivers/net/wireless/rtl8187_dev.c~git-net drivers/net/wireless/rtl8187_dev.c --- a/drivers/net/wireless/rtl8187_dev.c~git-net +++ a/drivers/net/wireless/rtl8187_dev.c @@ -96,7 +96,7 @@ static int rtl8187_tx(struct ieee80211_h if (control->flags & IEEE80211_TXCTL_USE_RTS_CTS) { tmp |= RTL8187_TX_FLAG_RTS; hdr->rts_duration = - ieee80211_rts_duration(dev, skb->len, control); + ieee80211_rts_duration(dev, priv->if_id, skb->len, control); } if (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) tmp |= RTL8187_TX_FLAG_CTS; @@ -510,6 +510,8 @@ static int rtl8187_config_interface(stru struct rtl8187_priv *priv = dev->priv; int i; + priv->if_id = if_id; + for (i = 0; i < ETH_ALEN; i++) rtl818x_iowrite8(priv, &priv->map->BSSID[i], conf->bssid[i]); @@ -604,8 +606,7 @@ static int __devinit rtl8187_probe(struc priv->mode = IEEE80211_IF_TYPE_MGMT; dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_RX_INCLUDES_FCS | - IEEE80211_HW_WEP_INCLUDE_IV | - IEEE80211_HW_DATA_NULLFUNC_ACK; + IEEE80211_HW_WEP_INCLUDE_IV; dev->extra_tx_headroom = sizeof(struct rtl8187_tx_hdr); dev->queues = 1; dev->max_rssi = 65; diff -puN drivers/net/xen-netfront.c~git-net drivers/net/xen-netfront.c --- a/drivers/net/xen-netfront.c~git-net +++ a/drivers/net/xen-netfront.c @@ -72,6 +72,7 @@ struct netfront_info { struct list_head list; struct net_device *netdev; + struct napi_struct napi; struct net_device_stats stats; struct xen_netif_tx_front_ring tx; @@ -185,7 +186,8 @@ static int xennet_can_sg(struct net_devi static void rx_refill_timeout(unsigned long data) { struct net_device *dev = (struct net_device *)data; - netif_rx_schedule(dev); + struct netfront_info *np = netdev_priv(dev); + netif_rx_schedule(dev, &np->napi); } static int netfront_tx_slot_available(struct netfront_info *np) @@ -309,12 +311,14 @@ static int xennet_open(struct net_device memset(&np->stats, 0, sizeof(np->stats)); + napi_enable(&np->napi); + spin_lock_bh(&np->rx_lock); if (netif_carrier_ok(dev)) { xennet_alloc_rx_buffers(dev); np->rx.sring->rsp_event = np->rx.rsp_cons + 1; if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } spin_unlock_bh(&np->rx_lock); @@ -556,6 +560,7 @@ static int xennet_close(struct net_devic { struct netfront_info *np = netdev_priv(dev); netif_stop_queue(np->netdev); + napi_disable(&np->napi); return 0; } @@ -839,15 +844,16 @@ static int handle_incoming_queue(struct return packets_dropped; } -static int xennet_poll(struct net_device *dev, int *pbudget) +static int xennet_poll(struct napi_struct *napi, int budget) { - struct netfront_info *np = netdev_priv(dev); + struct netfront_info *np = container_of(napi, struct netfront_info, napi); + struct net_device *dev = np->netdev; struct sk_buff *skb; struct netfront_rx_info rinfo; struct xen_netif_rx_response *rx = &rinfo.rx; struct xen_netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; - int work_done, budget, more_to_do = 1; + int work_done; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -866,9 +872,6 @@ static int xennet_poll(struct net_device skb_queue_head_init(&errq); skb_queue_head_init(&tmpq); - budget = *pbudget; - if (budget > dev->quota) - budget = dev->quota; rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -973,22 +976,21 @@ err: xennet_alloc_rx_buffers(dev); - *pbudget -= work_done; - dev->quota -= work_done; - if (work_done < budget) { + int more_to_do = 0; + local_irq_save(flags); RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); if (!more_to_do) - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); local_irq_restore(flags); } spin_unlock(&np->rx_lock); - return more_to_do; + return work_done; } static int xennet_change_mtu(struct net_device *dev, int mtu) @@ -1168,10 +1170,9 @@ static struct net_device * __devinit xen netdev->hard_start_xmit = xennet_start_xmit; netdev->stop = xennet_close; netdev->get_stats = xennet_get_stats; - netdev->poll = xennet_poll; + netif_napi_add(netdev, &np->napi, xennet_poll, 64); netdev->uninit = xennet_uninit; netdev->change_mtu = xennet_change_mtu; - netdev->weight = 64; netdev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); @@ -1316,7 +1317,7 @@ static irqreturn_t xennet_interrupt(int xennet_tx_buf_gc(dev); /* Under tx_lock: protects access to rx shared-ring indexes. */ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } spin_unlock_irqrestore(&np->tx_lock, flags); diff -puN include/linux/dccp.h~git-net include/linux/dccp.h --- a/include/linux/dccp.h~git-net +++ a/include/linux/dccp.h @@ -215,6 +215,7 @@ struct dccp_so_feat { #ifdef __KERNEL__ #include +#include #include #include #include @@ -498,7 +499,7 @@ struct dccp_sock { __u64 dccps_gar; __be32 dccps_service; struct dccp_service_list *dccps_service_list; - struct timeval dccps_timestamp_time; + ktime_t dccps_timestamp_time; __u32 dccps_timestamp_echo; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; @@ -511,7 +512,6 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; - struct timeval dccps_epoch; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; diff -puN include/linux/ethtool.h~git-net include/linux/ethtool.h --- a/include/linux/ethtool.h~git-net +++ a/include/linux/ethtool.h @@ -39,7 +39,8 @@ struct ethtool_drvinfo { char bus_info[ETHTOOL_BUSINFO_LEN]; /* Bus info for this IF. */ /* For PCI devices, use pci_name(pci_dev). */ char reserved1[32]; - char reserved2[16]; + char reserved2[12]; + __u32 n_priv_flags; /* number of flags valid in ETHTOOL_GPFLAGS */ __u32 n_stats; /* number of u64's from ETHTOOL_GSTATS */ __u32 testinfo_len; __u32 eedump_len; /* Size of data from ETHTOOL_GEEPROM (bytes) */ @@ -219,6 +220,7 @@ struct ethtool_pauseparam { enum ethtool_stringset { ETH_SS_TEST = 0, ETH_SS_STATS, + ETH_SS_PRIV_FLAGS, }; /* for passing string sets for data tagging */ @@ -256,6 +258,19 @@ struct ethtool_perm_addr { __u8 data[0]; }; +/* boolean flags controlling per-interface behavior characteristics. + * When reading, the flag indicates whether or not a certain behavior + * is enabled/present. When writing, the flag indicates whether + * or not the driver should turn on (set) or off (clear) a behavior. + * + * Some behaviors may read-only (unconditionally absent or present). + * If such is the case, return EINVAL in the set-flags operation if the + * flag differs from the read-only value. + */ +enum ethtool_flags { + ETH_FLAG_LRO = (1 << 15), /* LRO is enabled */ +}; + #ifdef __KERNEL__ struct net_device; @@ -272,6 +287,8 @@ u32 ethtool_op_get_tso(struct net_device int ethtool_op_set_tso(struct net_device *dev, u32 data); u32 ethtool_op_get_ufo(struct net_device *dev); int ethtool_op_set_ufo(struct net_device *dev, u32 data); +u32 ethtool_op_get_flags(struct net_device *dev); +int ethtool_op_set_flags(struct net_device *dev, u32 data); /** * ðtool_ops - Alter and report network device settings @@ -307,6 +324,8 @@ int ethtool_op_set_ufo(struct net_device * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device * get_stats: Return statistics about the device + * get_flags: get 32-bit flags bitmap + * set_flags: set 32-bit flags bitmap * * Description: * @@ -359,16 +378,23 @@ struct ethtool_ops { int (*set_sg)(struct net_device *, u32); u32 (*get_tso)(struct net_device *); int (*set_tso)(struct net_device *, u32); - int (*self_test_count)(struct net_device *); void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); void (*get_strings)(struct net_device *, u32 stringset, u8 *); int (*phys_id)(struct net_device *, u32); - int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); u32 (*get_ufo)(struct net_device *); int (*set_ufo)(struct net_device *, u32); + u32 (*get_flags)(struct net_device *); + int (*set_flags)(struct net_device *, u32); + u32 (*get_priv_flags)(struct net_device *); + int (*set_priv_flags)(struct net_device *, u32); + int (*get_sset_count)(struct net_device *, int); + + /* the following hooks are obsolete */ + int (*self_test_count)(struct net_device *);/* use get_sset_count */ + int (*get_stats_count)(struct net_device *);/* use get_sset_count */ }; #endif /* __KERNEL__ */ @@ -410,6 +436,10 @@ struct ethtool_ops { #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ #define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ #define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ +#define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */ +#define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */ +#define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */ +#define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff -puN /dev/null include/linux/inet_lro.h --- /dev/null +++ a/include/linux/inet_lro.h @@ -0,0 +1,177 @@ +/* + * linux/include/linux/inet_lro.h + * + * Large Receive Offload (ipv4 / tcp) + * + * (C) Copyright IBM Corp. 2007 + * + * Authors: + * Jan-Bernd Themann + * Christoph Raisch + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __INET_LRO_H_ +#define __INET_LRO_H_ + +#include +#include + +/* + * LRO statistics + */ + +struct net_lro_stats { + unsigned long aggregated; + unsigned long flushed; + unsigned long no_desc; +}; + +/* + * LRO descriptor for a tcp session + */ +struct net_lro_desc { + struct sk_buff *parent; + struct sk_buff *last_skb; + struct skb_frag_struct *next_frag; + struct iphdr *iph; + struct tcphdr *tcph; + struct vlan_group *vgrp; + __wsum data_csum; + u32 tcp_rcv_tsecr; + u32 tcp_rcv_tsval; + u32 tcp_ack; + u32 tcp_next_seq; + u32 skb_tot_frags_len; + u16 ip_tot_len; + u16 tcp_saw_tstamp; /* timestamps enabled */ + u16 tcp_window; + u16 vlan_tag; + int pkt_aggr_cnt; /* counts aggregated packets */ + int vlan_packet; + int mss; + int active; +}; + +/* + * Large Receive Offload (LRO) Manager + * + * Fields must be set by driver + */ + +struct net_lro_mgr { + struct net_device *dev; + struct net_lro_stats stats; + + /* LRO features */ + unsigned long features; +#define LRO_F_NAPI 1 /* Pass packets to stack via NAPI */ +#define LRO_F_EXTRACT_VLAN_ID 2 /* Set flag if VLAN IDs are extracted + from received packets and eth protocol + is still ETH_P_8021Q */ + + u32 ip_summed; /* Set in non generated SKBs in page mode */ + u32 ip_summed_aggr; /* Set in aggregated SKBs: CHECKSUM_UNNECESSARY + * or CHECKSUM_NONE */ + + int max_desc; /* Max number of LRO descriptors */ + int max_aggr; /* Max number of LRO packets to be aggregated */ + + struct net_lro_desc *lro_arr; /* Array of LRO descriptors */ + + /* + * Optimized driver functions + * + * get_skb_header: returns tcp and ip header for packet in SKB + */ + int (*get_skb_header)(struct sk_buff *skb, void **ip_hdr, + void **tcpudp_hdr, u64 *hdr_flags, void *priv); + + /* hdr_flags: */ +#define LRO_IPV4 1 /* ip_hdr is IPv4 header */ +#define LRO_TCP 2 /* tcpudp_hdr is TCP header */ + + /* + * get_frag_header: returns mac, tcp and ip header for packet in SKB + * + * @hdr_flags: Indicate what kind of LRO has to be done + * (IPv4/IPv6/TCP/UDP) + */ + int (*get_frag_header)(struct skb_frag_struct *frag, void **mac_hdr, + void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, + void *priv); +}; + +/* + * Processes a SKB + * + * @lro_mgr: LRO manager to use + * @skb: SKB to aggregate + * @priv: Private data that may be used by driver functions + * (for example get_tcp_ip_hdr) + */ + +void lro_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + void *priv); + +/* + * Processes a SKB with VLAN HW acceleration support + */ + +void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + struct vlan_group *vgrp, + u16 vlan_tag, + void *priv); + +/* + * Processes a fragment list + * + * This functions aggregate fragments and generate SKBs do pass + * the packets to the stack. + * + * @lro_mgr: LRO manager to use + * @frags: Fragment to be processed. Must contain entire header in first + * element. + * @len: Length of received data + * @true_size: Actual size of memory the fragment is consuming + * @priv: Private data that may be used by driver functions + * (for example get_tcp_ip_hdr) + */ + +void lro_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, void *priv, __wsum sum); + +void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, + void *priv, __wsum sum); + +/* + * Forward all aggregated SKBs held by lro_mgr to network stack + */ + +void lro_flush_all(struct net_lro_mgr *lro_mgr); + +void lro_flush_pkt(struct net_lro_mgr *lro_mgr, + struct iphdr *iph, struct tcphdr *tcph); + +#endif diff -puN include/linux/ktime.h~git-net include/linux/ktime.h --- a/include/linux/ktime.h~git-net +++ a/include/linux/ktime.h @@ -109,6 +109,13 @@ static inline ktime_t ktime_set(const lo #define ktime_sub_ns(kt, nsval) \ ({ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }; }) +/* + * Subtract a scalar nanosecod from a ktime_t variable + * res = kt - nsval: + */ +#define ktime_sub_ns(kt, nsval) \ + ({ (ktime_t){ .tv64 = (kt).tv64 - (nsval) }; }) + /* convert a timespec to ktime_t format: */ static inline ktime_t timespec_to_ktime(struct timespec ts) { @@ -216,6 +223,15 @@ extern ktime_t ktime_add_ns(const ktime_ extern ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec); /** + * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable + * @kt: minuend + * @nsec: the scalar nsec value to subtract + * + * Returns the subtraction of @nsec from @kt in ktime_t format + */ +extern ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec); + +/** * timespec_to_ktime - convert a timespec to ktime_t format * @ts: the timespec variable to convert * @@ -305,6 +321,11 @@ static inline ktime_t ktime_add_us(const return ktime_add_ns(kt, usec * 1000); } +static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) +{ + return ktime_sub_ns(kt, usec * 1000); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an diff -puN include/linux/netdevice.h~git-net include/linux/netdevice.h --- a/include/linux/netdevice.h~git-net +++ a/include/linux/netdevice.h @@ -31,6 +31,7 @@ #ifdef __KERNEL__ #include +#include #include #include #include @@ -38,6 +39,7 @@ #include #include #include +#include struct vlan_group; struct ethtool_ops; @@ -258,7 +260,6 @@ enum netdev_state_t __LINK_STATE_PRESENT, __LINK_STATE_SCHED, __LINK_STATE_NOCARRIER, - __LINK_STATE_RX_SCHED, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_QDISC_RUNNING, @@ -278,6 +279,96 @@ struct netdev_boot_setup { extern int __init netdev_boot_setup(char *str); /* + * Structure for NAPI scheduling similar to tasklet but with weighting + */ +struct napi_struct { + struct list_head poll_list; + unsigned long state; + int weight; + int quota; + int (*poll)(struct napi_struct *, int); +#ifdef CONFIG_NETPOLL + spinlock_t poll_lock; + int poll_owner; + struct net_device *dev; + struct list_head dev_list; +#endif +}; + +enum +{ + NAPI_STATE_SCHED, /* Poll is scheduled */ +}; + +extern void FASTCALL(__napi_schedule(struct napi_struct *n)); + +/** + * napi_schedule_prep - check if napi can be scheduled + * @n: napi context + * + * Test if NAPI routine is already running, and if not mark + * it as running. This is used as a condition variable + * insure only one NAPI poll instance runs + */ +static inline int napi_schedule_prep(struct napi_struct *n) +{ + return !test_and_set_bit(NAPI_STATE_SCHED, &n->state); +} + +/** + * napi_schedule - schedule NAPI poll + * @n: napi context + * + * Schedule NAPI poll routine to be called if it is not already + * running. + */ +static inline void napi_schedule(struct napi_struct *n) +{ + if (napi_schedule_prep(n)) + __napi_schedule(n); +} + +/** + * napi_complete - NAPI processing complete + * @n: napi context + * + * Mark NAPI processing as complete. + */ +static inline void napi_complete(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + smp_mb__before_clear_bit(); + clear_bit(NAPI_STATE_SCHED, &n->state); +} + +/** + * napi_disable - prevent NAPI from scheduling + * @n: napi context + * + * Stop NAPI from being scheduled on this context. + * Waits till any outstanding processing completes. + */ +static inline void napi_disable(struct napi_struct *n) +{ + while (test_and_set_bit(NAPI_STATE_SCHED, &n->state)) + msleep_interruptible(1); +} + +/** + * napi_enable - enable NAPI scheduling + * @n: napi context + * + * Resume NAPI from being scheduled on this context. + * Must be paired with napi_disable. + */ +static inline void napi_enable(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + smp_mb__before_clear_bit(); + clear_bit(NAPI_STATE_SCHED, &n->state); +} + +/* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O * data with strictly "high-level" data, and it has to know about @@ -319,6 +410,9 @@ struct net_device unsigned long state; struct list_head dev_list; +#ifdef CONFIG_NETPOLL + struct list_head napi_list; +#endif /* The device initialization function. Called only once. */ int (*init)(struct net_device *dev); @@ -341,6 +435,7 @@ struct net_device #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ #define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ +#define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -430,12 +525,6 @@ struct net_device /* * Cache line mostly used on receive path (including eth_type_trans()) */ - struct list_head poll_list ____cacheline_aligned_in_smp; - /* Link to poll list */ - - int (*poll) (struct net_device *dev, int *quota); - int quota; - int weight; unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast @@ -582,6 +671,12 @@ struct net_device #define NETDEV_ALIGN 32 #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) +/** + * netdev_priv - access network device private data + * @dev: network device + * + * Get network device private data + */ static inline void *netdev_priv(const struct net_device *dev) { return dev->priv; @@ -593,6 +688,23 @@ static inline void *netdev_priv(const st */ #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) +static inline void netif_napi_add(struct net_device *dev, + struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), + int weight) +{ + INIT_LIST_HEAD(&napi->poll_list); + napi->poll = poll; + napi->weight = weight; +#ifdef CONFIG_NETPOLL + napi->dev = dev; + list_add(&napi->dev_list, &dev->napi_list); + spin_lock_init(&napi->poll_lock); + napi->poll_owner = -1; +#endif + set_bit(NAPI_STATE_SCHED, &napi->state); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ @@ -678,7 +790,6 @@ static inline int unregister_gifconf(uns * Incoming packets are placed on per-cpu queues so that * no locking is needed. */ - struct softnet_data { struct net_device *output_queue; @@ -686,7 +797,7 @@ struct softnet_data struct list_head poll_list; struct sk_buff *completion_queue; - struct net_device backlog_dev; /* Sorry. 8) */ + struct napi_struct backlog; #ifdef CONFIG_NET_DMA struct dma_chan *net_dma; #endif @@ -704,11 +815,24 @@ static inline void netif_schedule(struct __netif_schedule(dev); } +/** + * netif_start_queue - allow transmit + * @dev: network device + * + * Allow upper layers to call the device hard_start_xmit routine. + */ static inline void netif_start_queue(struct net_device *dev) { clear_bit(__LINK_STATE_XOFF, &dev->state); } +/** + * netif_wake_queue - restart transmit + * @dev: network device + * + * Allow upper layers to call the device hard_start_xmit routine. + * Used for flow control when transmit resources are available. + */ static inline void netif_wake_queue(struct net_device *dev) { #ifdef CONFIG_NETPOLL_TRAP @@ -721,16 +845,35 @@ static inline void netif_wake_queue(stru __netif_schedule(dev); } +/** + * netif_stop_queue - stop transmitted packets + * @dev: network device + * + * Stop upper layers calling the device hard_start_xmit routine. + * Used for flow control when transmit resources are unavailable. + */ static inline void netif_stop_queue(struct net_device *dev) { set_bit(__LINK_STATE_XOFF, &dev->state); } +/** + * netif_queue_stopped - test if transmit queue is flowblocked + * @dev: network device + * + * Test if transmit queue on device is currently unable to send. + */ static inline int netif_queue_stopped(const struct net_device *dev) { return test_bit(__LINK_STATE_XOFF, &dev->state); } +/** + * netif_running - test if up + * @dev: network device + * + * Test if the device has been brought up. + */ static inline int netif_running(const struct net_device *dev) { return test_bit(__LINK_STATE_START, &dev->state); @@ -742,6 +885,14 @@ static inline int netif_running(const st * done at the overall netdevice level. * Also test the device if we're multiqueue. */ + +/** + * netif_start_subqueue - allow sending packets on subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Start individual transmit queue of a device with multiple transmit queues. + */ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) { #ifdef CONFIG_NETDEVICES_MULTIQUEUE @@ -749,6 +900,13 @@ static inline void netif_start_subqueue( #endif } +/** + * netif_stop_subqueue - stop sending packets on subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Stop individual transmit queue of a device with multiple transmit queues. + */ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { #ifdef CONFIG_NETDEVICES_MULTIQUEUE @@ -760,6 +918,13 @@ static inline void netif_stop_subqueue(s #endif } +/** + * netif_subqueue_stopped - test status of subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Check individual transmit queue of a device with multiple transmit queues. + */ static inline int netif_subqueue_stopped(const struct net_device *dev, u16 queue_index) { @@ -771,6 +936,14 @@ static inline int netif_subqueue_stopped #endif } + +/** + * netif_wake_subqueue - allow sending packets on subqueue + * @dev: network device + * @queue_index: sub queue index + * + * Resume individual transmit queue of a device with multiple transmit queues. + */ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { #ifdef CONFIG_NETDEVICES_MULTIQUEUE @@ -784,6 +957,13 @@ static inline void netif_wake_subqueue(s #endif } +/** + * netif_is_multiqueue - test if device has multiple transmit queues + * @dev: network device + * + * Check if device has multiple transmit queues + * Always falls if NETDEVICE_MULTIQUEUE is not configured + */ static inline int netif_is_multiqueue(const struct net_device *dev) { #ifdef CONFIG_NETDEVICES_MULTIQUEUE @@ -796,20 +976,7 @@ static inline int netif_is_multiqueue(co /* Use this variant when it is known for sure that it * is executing from interrupt context. */ -static inline void dev_kfree_skb_irq(struct sk_buff *skb) -{ - if (atomic_dec_and_test(&skb->users)) { - struct softnet_data *sd; - unsigned long flags; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - skb->next = sd->completion_queue; - sd->completion_queue = skb; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); - } -} +extern void dev_kfree_skb_irq(struct sk_buff *skb); /* Use this variant in places where it could be invoked * either from interrupt or non-interrupt context. @@ -833,18 +1000,28 @@ extern int dev_set_mac_address(struct n extern int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev); -extern void dev_init(void); - extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ extern void netdev_run_todo(void); +/** + * dev_put - get reference to device + * @dev: network device + * + * Hold reference to device to keep it from being freed. + */ static inline void dev_put(struct net_device *dev) { atomic_dec(&dev->refcnt); } +/** + * dev_hold - release reference to device + * @dev: network device + * + * Release reference to device to allow it to be freed. + */ static inline void dev_hold(struct net_device *dev) { atomic_inc(&dev->refcnt); @@ -861,6 +1038,12 @@ static inline void dev_hold(struct net_d extern void linkwatch_fire_event(struct net_device *dev); +/** + * netif_carrier_ok - test if carrier present + * @dev: network device + * + * Check if carrier is present on device + */ static inline int netif_carrier_ok(const struct net_device *dev) { return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); @@ -872,30 +1055,66 @@ extern void netif_carrier_on(struct net_ extern void netif_carrier_off(struct net_device *dev); +/** + * netif_dormant_on - mark device as dormant. + * @dev: network device + * + * Mark device as dormant (as per RFC2863). + * + * The dormant state indicates that the relevant interface is not + * actually in a condition to pass packets (i.e., it is not 'up') but is + * in a "pending" state, waiting for some external event. For "on- + * demand" interfaces, this new state identifies the situation where the + * interface is waiting for events to place it in the up state. + * + */ static inline void netif_dormant_on(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) linkwatch_fire_event(dev); } +/** + * netif_dormant_off - set device as not dormant. + * @dev: network device + * + * Device is not in dormant state. + */ static inline void netif_dormant_off(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) linkwatch_fire_event(dev); } +/** + * netif_dormant - test if carrier present + * @dev: network device + * + * Check if carrier is present on device + */ static inline int netif_dormant(const struct net_device *dev) { return test_bit(__LINK_STATE_DORMANT, &dev->state); } +/** + * netif_oper_up - test if device is operational + * @dev: network device + * + * Check if carrier is operational + */ static inline int netif_oper_up(const struct net_device *dev) { return (dev->operstate == IF_OPER_UP || dev->operstate == IF_OPER_UNKNOWN /* backward compat */); } -/* Hot-plugging. */ +/** + * netif_device_present - is device available or removed + * @dev: network device + * + * Check if device has not been removed from system. + */ static inline int netif_device_present(struct net_device *dev) { return test_bit(__LINK_STATE_PRESENT, &dev->state); @@ -955,60 +1174,58 @@ static inline u32 netif_msg_init(int deb return (1 << debug_value) - 1; } -/* Test if receive needs to be scheduled */ -static inline int __netif_rx_schedule_prep(struct net_device *dev) +/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete(). + * Do not inline this? + */ +static inline int netif_rx_reschedule(struct napi_struct *n) { - return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); + if (napi_schedule_prep(n)) { + unsigned long flags; + + local_irq_save(flags); + list_add_tail(&n->poll_list, + &__get_cpu_var(softnet_data).poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); + return 1; + } + return 0; } /* Test if receive needs to be scheduled but only if up */ -static inline int netif_rx_schedule_prep(struct net_device *dev) +static inline int netif_rx_schedule_prep(struct net_device *dev, + struct napi_struct *napi) { - return netif_running(dev) && __netif_rx_schedule_prep(dev); + return netif_running(dev) && napi_schedule_prep(napi); } /* Add interface to tail of rx poll list. This assumes that _prep has * already been called and returned 1. */ - -extern void __netif_rx_schedule(struct net_device *dev); - -/* Try to reschedule poll. Called by irq handler. */ - -static inline void netif_rx_schedule(struct net_device *dev) +static inline void __netif_rx_schedule(struct net_device *dev, + struct napi_struct *napi) { - if (netif_rx_schedule_prep(dev)) - __netif_rx_schedule(dev); + dev_hold(dev); + __napi_schedule(napi); } -/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete(). - * Do not inline this? - */ -static inline int netif_rx_reschedule(struct net_device *dev, int undo) -{ - if (netif_rx_schedule_prep(dev)) { - unsigned long flags; - - dev->quota += undo; +/* Try to reschedule poll. Called by irq handler. */ - local_irq_save(flags); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); - return 1; - } - return 0; +static inline void netif_rx_schedule(struct net_device *dev, + struct napi_struct *napi) +{ + if (netif_rx_schedule_prep(dev, napi)) + __netif_rx_schedule(dev, napi); } /* same as netif_rx_complete, except that local_irq_save(flags) * has already been issued */ -static inline void __netif_rx_complete(struct net_device *dev) +static inline void __netif_rx_complete(struct net_device *dev, + struct napi_struct *napi) { - BUG_ON(!test_bit(__LINK_STATE_RX_SCHED, &dev->state)); - list_del(&dev->poll_list); - smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_RX_SCHED, &dev->state); + napi_complete(napi); + dev_put(dev); } /* Remove interface from poll list: it must be in the poll list @@ -1016,28 +1233,22 @@ static inline void __netif_rx_complete(s * it completes the work. The device cannot be out of poll list at this * moment, it is BUG(). */ -static inline void netif_rx_complete(struct net_device *dev) +static inline void netif_rx_complete(struct net_device *dev, + struct napi_struct *napi) { unsigned long flags; local_irq_save(flags); - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); local_irq_restore(flags); } -static inline void netif_poll_disable(struct net_device *dev) -{ - while (test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state)) - /* No hurry. */ - schedule_timeout_interruptible(1); -} - -static inline void netif_poll_enable(struct net_device *dev) -{ - smp_mb__before_clear_bit(); - clear_bit(__LINK_STATE_RX_SCHED, &dev->state); -} - +/** + * netif_tx_lock - grab network device transmit lock + * @dev: network device + * + * Get network device transmit lock + */ static inline void netif_tx_lock(struct net_device *dev) { spin_lock(&dev->_xmit_lock); diff -puN include/linux/netpoll.h~git-net include/linux/netpoll.h --- a/include/linux/netpoll.h~git-net +++ a/include/linux/netpoll.h @@ -25,8 +25,6 @@ struct netpoll { struct netpoll_info { atomic_t refcnt; - spinlock_t poll_lock; - int poll_owner; int rx_flags; spinlock_t rx_lock; struct netpoll *rx_np; /* netpoll that registered an rx_hook */ @@ -37,6 +35,7 @@ struct netpoll_info { void netpoll_poll(struct netpoll *np); void netpoll_send_udp(struct netpoll *np, const char *msg, int len); +void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int netpoll_setup(struct netpoll *np); int netpoll_trap(void); @@ -64,32 +63,61 @@ static inline int netpoll_rx(struct sk_b return ret; } -static inline void *netpoll_poll_lock(struct net_device *dev) +static inline int netpoll_receive_skb(struct sk_buff *skb) { + if (!list_empty(&skb->dev->napi_list)) + return netpoll_rx(skb); + return 0; +} + +static inline void *netpoll_poll_lock(struct napi_struct *napi) +{ + struct net_device *dev = napi->dev; + rcu_read_lock(); /* deal with race on ->npinfo */ - if (dev->npinfo) { - spin_lock(&dev->npinfo->poll_lock); - dev->npinfo->poll_owner = smp_processor_id(); - return dev->npinfo; + if (dev && dev->npinfo) { + spin_lock(&napi->poll_lock); + napi->poll_owner = smp_processor_id(); + return napi; } return NULL; } static inline void netpoll_poll_unlock(void *have) { - struct netpoll_info *npi = have; + struct napi_struct *napi = have; - if (npi) { - npi->poll_owner = -1; - spin_unlock(&npi->poll_lock); + if (napi) { + napi->poll_owner = -1; + spin_unlock(&napi->poll_lock); } rcu_read_unlock(); } +static inline void netpoll_netdev_init(struct net_device *dev) +{ + INIT_LIST_HEAD(&dev->napi_list); +} + #else -#define netpoll_rx(a) 0 -#define netpoll_poll_lock(a) NULL -#define netpoll_poll_unlock(a) +static inline int netpoll_rx(struct sk_buff *skb) +{ + return 0; +} +static inline int netpoll_receive_skb(struct sk_buff *skb) +{ + return 0; +} +static inline void *netpoll_poll_lock(struct napi_struct *napi) +{ + return NULL; +} +static inline void netpoll_poll_unlock(void *have) +{ +} +static inline void netpoll_netdev_init(struct net_device *dev) +{ +} #endif #endif diff -puN include/linux/snmp.h~git-net include/linux/snmp.h --- a/include/linux/snmp.h~git-net +++ a/include/linux/snmp.h @@ -231,6 +231,9 @@ enum LINUX_MIB_TCPABORTONLINGER, /* TCPAbortOnLinger */ LINUX_MIB_TCPABORTFAILED, /* TCPAbortFailed */ LINUX_MIB_TCPMEMORYPRESSURES, /* TCPMemoryPressures */ + LINUX_MIB_TCPSACKDISCARD, /* TCPSACKDiscard */ + LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ + LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ __LINUX_MIB_MAX }; diff -puN include/linux/tcp.h~git-net include/linux/tcp.h --- a/include/linux/tcp.h~git-net +++ a/include/linux/tcp.h @@ -304,7 +304,6 @@ struct tcp_sock { u32 rtt_seq; /* sequence number to update rttvar */ u32 packets_out; /* Packets which are "in flight" */ - u32 left_out; /* Packets which leaved network */ u32 retrans_out; /* Retransmitted packets out */ /* * Options received (usually on last packet, some only on SYN packets). @@ -333,6 +332,9 @@ struct tcp_sock { struct tcp_sack_block_wire recv_sack_cache[4]; + u32 highest_sack; /* Start seq of globally highest revd SACK + * (validity guaranteed only if sacked_out > 0) */ + /* from STCP, retrans queue hinting */ struct sk_buff* lost_skb_hint; @@ -344,7 +346,6 @@ struct tcp_sock { int fastpath_cnt_hint; int lost_cnt_hint; int retransmit_cnt_hint; - int forward_cnt_hint; u16 advmss; /* Advertised MSS */ u16 prior_ssthresh; /* ssthresh saved at recovery start */ diff -puN include/net/ieee80211_radiotap.h~git-net include/net/ieee80211_radiotap.h --- a/include/net/ieee80211_radiotap.h~git-net +++ a/include/net/ieee80211_radiotap.h @@ -40,6 +40,7 @@ #include #include +#include /* Radiotap header version (from official NetBSD feed) */ #define IEEE80211RADIOTAP_VERSION "1.5" @@ -255,4 +256,13 @@ enum ieee80211_radiotap_type { (((x) == 14) ? 2484 : ((x) * 5) + 2407) : \ ((x) + 1000) * 5) +/* helpers */ +static inline int ieee80211_get_radiotap_len(unsigned char *data) +{ + struct ieee80211_radiotap_header *hdr = + (struct ieee80211_radiotap_header *)data; + + return le16_to_cpu(get_unaligned(&hdr->it_len)); +} + #endif /* IEEE80211_RADIOTAP_H */ diff -puN include/net/ip6_fib.h~git-net include/net/ip6_fib.h --- a/include/net/ip6_fib.h~git-net +++ a/include/net/ip6_fib.h @@ -105,6 +105,8 @@ struct rt6_info struct rt6key rt6i_src; u8 rt6i_protocol; + + u32 rt6i_flow_cache_genid; }; static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) diff -puN include/net/ipv6.h~git-net include/net/ipv6.h --- a/include/net/ipv6.h~git-net +++ a/include/net/ipv6.h @@ -377,6 +377,12 @@ static inline int ipv6_addr_any(const st a->s6_addr32[2] | a->s6_addr32[3] ) == 0); } +static inline int ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((a->s6_addr32[0] | a->s6_addr32[1]) == 0 && + a->s6_addr32[2] == htonl(0x0000ffff)); +} + /* * find the first different bit between two addresses * length of address must be a multiple of 32bits diff -puN include/net/mac80211.h~git-net include/net/mac80211.h --- a/include/net/mac80211.h~git-net +++ a/include/net/mac80211.h @@ -1,7 +1,9 @@ /* - * Low-level hardware driver -- IEEE 802.11 driver (80211.o) interface + * mac80211 <-> driver interface + * * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -192,13 +194,19 @@ struct ieee80211_tx_control { #define IEEE80211_TXCTL_FIRST_FRAGMENT (1<<8) /* this is a first fragment of * the frame */ #define IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY (1<<9) +#define IEEE80211_TXCTL_LONG_RETRY_LIMIT (1<<10) /* this frame should be send + * using the through + * set_retry_limit configured + * long retry value */ u32 flags; /* tx control flags defined * above */ - u8 retry_limit; /* 1 = only first attempt, 2 = one retry, .. */ + u8 retry_limit; /* 1 = only first attempt, 2 = one retry, .. + * This could be used when set_retry_limit + * is not implemented by the driver */ u8 power_level; /* per-packet transmit power level, in dBm */ u8 antenna_sel_tx; /* 0 = default/diversity, 1 = Ant0, 2 = Ant1 */ - s8 key_idx; /* -1 = do not encrypt, >= 0 keyidx from - * hw->set_key() */ + s8 key_idx; /* HW_KEY_IDX_INVALID = do not encrypt, + * other values: keyidx from hw->set_key() */ u8 icv_len; /* length of the ICV/MIC field in octets */ u8 iv_len; /* length of the IV field in octets */ u8 tkip_key[16]; /* generated phase2/phase1 key for hw TKIP */ @@ -288,28 +296,10 @@ struct ieee80211_conf { u8 power_level; /* transmit power limit for current * regulatory domain; in dBm */ u8 antenna_max; /* maximum antenna gain */ - short tx_power_reduction; /* in 0.1 dBm */ /* 0 = default/diversity, 1 = Ant0, 2 = Ant1 */ u8 antenna_sel_tx; u8 antenna_sel_rx; - - int antenna_def; - int antenna_mode; - - /* Following five fields are used for IEEE 802.11H */ - unsigned int radar_detect; - unsigned int spect_mgmt; - /* All following fields are currently unused. */ - unsigned int quiet_duration; /* duration of quiet period */ - unsigned int quiet_offset; /* how far into the beacon is the quiet - * period */ - unsigned int quiet_period; - u8 radar_firpwr_threshold; - u8 radar_rssi_threshold; - u8 pulse_height_threshold; - u8 pulse_rssi_threshold; - u8 pulse_inband_threshold; }; /** @@ -402,29 +392,41 @@ struct ieee80211_if_conf { struct ieee80211_tx_control *beacon_control; }; -typedef enum { ALG_NONE, ALG_WEP, ALG_TKIP, ALG_CCMP, ALG_NULL } -ieee80211_key_alg; +typedef enum { + ALG_NONE, + ALG_WEP, + ALG_TKIP, + ALG_CCMP, +} ieee80211_key_alg; +/* + * This flag indiciates that the station this key is being + * configured for may use QoS. If your hardware cannot handle + * that situation it should reject that key. + */ +#define IEEE80211_KEY_FLAG_WMM_STA (1<<0) struct ieee80211_key_conf { + /* + * To be set by the driver to the key index it would like to + * get in the ieee80211_tx_control.key_idx which defaults + * to HW_KEY_IDX_INVALID so that shouldn't be used. + */ + int hw_key_idx; - int hw_key_idx; /* filled + used by low-level driver */ + /* key algorithm, ALG_NONE should never be seen by the driver */ ieee80211_key_alg alg; - int keylen; -#define IEEE80211_KEY_FORCE_SW_ENCRYPT (1<<0) /* to be cleared by low-level - driver */ -#define IEEE80211_KEY_DEFAULT_TX_KEY (1<<1) /* This key is the new default TX - key (used only for broadcast - keys). */ -#define IEEE80211_KEY_DEFAULT_WEP_ONLY (1<<2) /* static WEP is the only - configured security policy; - this allows some low-level - drivers to determine when - hwaccel can be used */ - u32 flags; /* key configuration flags defined above */ + /* key flags, see above */ + u8 flags; + + /* key index: 0-3 */ + s8 keyidx; + + /* length of key material */ + u8 keylen; - s8 keyidx; /* WEP key index */ + /* the key material */ u8 key[0]; }; @@ -432,7 +434,7 @@ struct ieee80211_key_conf { #define IEEE80211_SEQ_COUNTER_TX 1 typedef enum { - SET_KEY, DISABLE_KEY, REMOVE_ALL_KEYS, + SET_KEY, DISABLE_KEY, } set_key_cmd; /* This is driver-visible part of the per-hw state the stack keeps. */ @@ -457,20 +459,25 @@ struct ieee80211_hw { /* TODO: frame_type 802.11/802.3, sw_encryption requirements */ - /* Some wireless LAN chipsets generate beacons in the hardware/firmware - * and others rely on host generated beacons. This option is used to - * configure the upper layer IEEE 802.11 module to generate beacons. - * The low-level driver can use ieee80211_beacon_get() to fetch the - * next beacon frame. */ -#define IEEE80211_HW_HOST_GEN_BEACON (1<<0) +/* hole at 0 */ - /* The device needs to be supplied with a beacon template only. */ + /* + * The device only needs to be supplied with a beacon template. + * If you need the host to generate each beacon then don't use + * this flag and use ieee80211_beacon_get(). + */ #define IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE (1<<1) - /* Some devices handle decryption internally and do not + /* + * Some devices handle decryption internally and do not * indicate whether the frame was encrypted (unencrypted frames * will be dropped by the hardware, unless specifically allowed - * through) */ + * through.) + * It is permissible to not handle all encrypted frames and fall + * back to software encryption; however, if this flag is set + * unencrypted frames must be dropped unless the driver is told + * otherwise via the set_ieee8021x() callback. + */ #define IEEE80211_HW_DEVICE_HIDES_WEP (1<<2) /* Whether RX frames passed to ieee80211_rx() include FCS in the end */ @@ -484,19 +491,30 @@ struct ieee80211_hw { * can fetch them with ieee80211_get_buffered_bc(). */ #define IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING (1<<4) + /* + * This flag is only relevant if hardware encryption is used. + * If set, it has two meanings: + * 1) the IV and ICV are present in received frames that have + * been decrypted (unless IEEE80211_HW_DEVICE_HIDES_WEP is + * also set) + * 2) on transmission, the IV should be generated in software. + * + * Please let us know if you *don't* use this flag, the stack would + * really like to be able to get the IV to keep key statistics + * accurate. + */ #define IEEE80211_HW_WEP_INCLUDE_IV (1<<5) - /* will data nullfunc frames get proper TX status callback */ -#define IEEE80211_HW_DATA_NULLFUNC_ACK (1<<6) +/* hole at 6 */ - /* Force software encryption for TKIP packets if WMM is enabled. */ -#define IEEE80211_HW_NO_TKIP_WMM_HWACCEL (1<<7) +/* hole at 7 */ - /* Some devices handle Michael MIC internally and do not include MIC in - * the received packets passed up. device_strips_mic must be set - * for such devices. The 'encryption' frame control bit is expected to - * be still set in the IEEE 802.11 header with this option unlike with - * the device_hides_wep configuration option. + /* + * Some devices handle Michael MIC internally and do not include MIC in + * the received packets passed up. This flag must be set for such + * devices. The 'encryption' frame control bit is expected to be still + * set in the IEEE 802.11 header with this option unlike with the + * IEEE80211_HW_DEVICE_HIDES_WEP flag. */ #define IEEE80211_HW_DEVICE_STRIPS_MIC (1<<8) @@ -562,9 +580,6 @@ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_tx_control *control); - /* Handler for performing hardware reset. */ - int (*reset)(struct ieee80211_hw *hw); - /* Handler that is called when any netdevice attached to the hardware * device is set UP for the first time. This can be used, e.g., to * enable interrupts and beacon sending. */ @@ -621,21 +636,48 @@ struct ieee80211_ops { * Must be atomic. */ int (*set_tim)(struct ieee80211_hw *hw, int aid, int set); - /* Set encryption key. IEEE 802.11 module calls this function to set - * encryption keys. addr is ff:ff:ff:ff:ff:ff for default keys and - * station hwaddr for individual keys. aid of the station is given - * to help low-level driver in selecting which key->hw_key_idx to use - * for this key. TX control data will use the hw_key_idx selected by - * the low-level driver. - * Must be atomic. */ + /* + * Set encryption key. + * + * This is called to enable hardware acceleration of encryption and + * decryption. The address will be the broadcast address for default + * keys, the other station's hardware address for individual keys or + * the zero address for keys that will be used only for transmission. + * + * The local_address parameter will always be set to our own address, + * this is only relevant if you support multiple local addresses. + * + * When transmitting, the TX control data will use the hw_key_idx + * selected by the low-level driver. + * + * Return 0 if the key is now in use, -EOPNOTSUPP or -ENOSPC if it + * couldn't be added; if you return 0 then hw_key_idx must be + * assigned to something other than HW_KEY_IDX_INVALID. When the cmd + * is DISABLE_KEY then it must succeed. + * + * This callback can sleep, and is only called between add_interface + * and remove_interface calls, i.e. while the interface with the + * given local_address is enabled. + * + * The ieee80211_key_conf structure pointed to by the key parameter + * is guaranteed to be valid until another call to set_key removes + * it, but it can only be used as a cookie to differentiate keys. + */ int (*set_key)(struct ieee80211_hw *hw, set_key_cmd cmd, - u8 *addr, struct ieee80211_key_conf *key, int aid); + const u8 *local_address, const u8 *address, + struct ieee80211_key_conf *key); - /* Set TX key index for default/broadcast keys. This is needed in cases + /* + * Set TX key index for default/broadcast keys. This is needed in cases * where wlan card is doing full WEP/TKIP encapsulation (wep_include_iv * is not set), in other cases, this function pointer can be set to - * NULL since the IEEE 802. 11 module takes care of selecting the key - * index for each TX frame. */ + * NULL since the IEEE 802.11 module takes care of selecting the key + * index for each TX frame. + * + * TODO: If you use this callback in your driver tell us if you need + * any other information from it to make it easier, like the + * key_conf instead. + */ int (*set_key_idx)(struct ieee80211_hw *hw, int idx); /* Enable/disable IEEE 802.1X. This item requests wlan card to pass @@ -692,6 +734,14 @@ struct ieee80211_ops { void (*sta_table_notification)(struct ieee80211_hw *hw, int num_sta); + /* Handle ERP IE change notifications. Must be atomic. */ + void (*erp_ie_changed)(struct ieee80211_hw *hw, u8 changes, + int cts_protection, int preamble); + + /* Flags for the erp_ie_changed changes parameter */ +#define IEEE80211_ERP_CHANGE_PROTECTION (1<<0) /* protection flag changed */ +#define IEEE80211_ERP_CHANGE_PREAMBLE (1<<1) /* barker preamble mode changed */ + /* Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. * queue = IEEE80211_TX_QUEUE_*. @@ -830,6 +880,7 @@ struct sk_buff *ieee80211_beacon_get(str /** * ieee80211_rts_get - RTS frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). + * @if_id: interface ID from &struct ieee80211_if_init_conf. * @frame: pointer to the frame that is going to be protected by the RTS. * @frame_len: the frame length (in octets). * @frame_txctl: &struct ieee80211_tx_control of the frame. @@ -840,7 +891,7 @@ struct sk_buff *ieee80211_beacon_get(str * the next RTS frame from the 802.11 code. The low-level is responsible * for calling this function before and RTS frame is needed. */ -void ieee80211_rts_get(struct ieee80211_hw *hw, +void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id, const void *frame, size_t frame_len, const struct ieee80211_tx_control *frame_txctl, struct ieee80211_rts *rts); @@ -848,6 +899,7 @@ void ieee80211_rts_get(struct ieee80211_ /** * ieee80211_rts_duration - Get the duration field for an RTS frame * @hw: pointer obtained from ieee80211_alloc_hw(). + * @if_id: interface ID from &struct ieee80211_if_init_conf. * @frame_len: the length of the frame that is going to be protected by the RTS. * @frame_txctl: &struct ieee80211_tx_control of the frame. * @@ -855,13 +907,14 @@ void ieee80211_rts_get(struct ieee80211_ * the duration field, the low-level driver uses this function to receive * the duration field value in little-endian byteorder. */ -__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, +__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id, size_t frame_len, const struct ieee80211_tx_control *frame_txctl); /** * ieee80211_ctstoself_get - CTS-to-self frame generation function * @hw: pointer obtained from ieee80211_alloc_hw(). + * @if_id: interface ID from &struct ieee80211_if_init_conf. * @frame: pointer to the frame that is going to be protected by the CTS-to-self. * @frame_len: the frame length (in octets). * @frame_txctl: &struct ieee80211_tx_control of the frame. @@ -872,7 +925,7 @@ __le16 ieee80211_rts_duration(struct iee * the next CTS-to-self frame from the 802.11 code. The low-level is responsible * for calling this function before and CTS-to-self frame is needed. */ -void ieee80211_ctstoself_get(struct ieee80211_hw *hw, +void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id, const void *frame, size_t frame_len, const struct ieee80211_tx_control *frame_txctl, struct ieee80211_cts *cts); @@ -880,6 +933,7 @@ void ieee80211_ctstoself_get(struct ieee /** * ieee80211_ctstoself_duration - Get the duration field for a CTS-to-self frame * @hw: pointer obtained from ieee80211_alloc_hw(). + * @if_id: interface ID from &struct ieee80211_if_init_conf. * @frame_len: the length of the frame that is going to be protected by the CTS-to-self. * @frame_txctl: &struct ieee80211_tx_control of the frame. * @@ -887,20 +941,21 @@ void ieee80211_ctstoself_get(struct ieee * the duration field, the low-level driver uses this function to receive * the duration field value in little-endian byteorder. */ -__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, +__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id, size_t frame_len, const struct ieee80211_tx_control *frame_txctl); /** * ieee80211_generic_frame_duration - Calculate the duration field for a frame * @hw: pointer obtained from ieee80211_alloc_hw(). + * @if_id: interface ID from &struct ieee80211_if_init_conf. * @frame_len: the length of the frame. * @rate: the rate (in 100kbps) at which the frame is going to be transmitted. * * Calculate the duration field of some generic frame, given its * length and transmission rate (in 100kbps). */ -__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, +__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id, size_t frame_len, int rate); @@ -1002,12 +1057,6 @@ ieee80211_get_mc_list_item(struct ieee80 /* called by driver to notify scan status completed */ void ieee80211_scan_completed(struct ieee80211_hw *hw); -/* Function to indicate Radar Detection. The low level driver must call this - * function to indicate the presence of radar in the current channel. - * Additionally the radar type also could be sent */ -int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, - int radar, int radar_type); - /* return a pointer to the source address (SA) */ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr) { diff -puN include/net/rtnetlink.h~git-net include/net/rtnetlink.h --- a/include/net/rtnetlink.h~git-net +++ a/include/net/rtnetlink.h @@ -78,6 +78,10 @@ extern void __rtnl_link_unregister(struc extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); +extern struct net_device *rtnl_create_link(char *ifname, + const struct rtnl_link_ops *ops, struct nlattr *tb[]); +extern const struct nla_policy ifla_policy[IFLA_MAX+1]; + #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) #endif diff -puN include/net/sctp/sctp.h~git-net include/net/sctp/sctp.h --- a/include/net/sctp/sctp.h~git-net +++ a/include/net/sctp/sctp.h @@ -468,6 +468,11 @@ static inline void sctp_skb_set_owner_r( skb->sk = sk; skb->destructor = sctp_sock_rfree; atomic_add(event->rmem_len, &sk->sk_rmem_alloc); + /* + * This mimics the behavior of + * sk_stream_set_owner_r + */ + sk->sk_forward_alloc -= event->rmem_len; } /* Tests if the list has one and only one entry. */ diff -puN include/net/sock.h~git-net include/net/sock.h --- a/include/net/sock.h~git-net +++ a/include/net/sock.h @@ -40,6 +40,7 @@ #ifndef _SOCK_H #define _SOCK_H +#include #include #include #include @@ -702,7 +703,7 @@ extern int sk_stream_mem_schedule(struct static inline int sk_stream_pages(int amt) { - return (amt + SK_STREAM_MEM_QUANTUM - 1) / SK_STREAM_MEM_QUANTUM; + return DIV_ROUND_UP(amt, SK_STREAM_MEM_QUANTUM); } static inline void sk_stream_mem_reclaim(struct sock *sk) diff -puN include/net/tcp.h~git-net include/net/tcp.h --- a/include/net/tcp.h~git-net +++ a/include/net/tcp.h @@ -39,6 +39,7 @@ #include #include #include +#include #include @@ -330,6 +331,17 @@ static inline void tcp_clear_options(str rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; } +#define TCP_ECN_OK 1 +#define TCP_ECN_QUEUE_CWR 2 +#define TCP_ECN_DEMAND_CWR 4 + +static __inline__ void +TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) +{ + if (sysctl_tcp_ecn && th->ece && th->cwr) + inet_rsk(req)->ecn_ok = 1; +} + enum tcp_tw_status { TCP_TW_SUCCESS = 0, @@ -573,8 +585,6 @@ struct tcp_skb_cb { #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) -#include - /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ @@ -589,32 +599,19 @@ static inline int tcp_skb_mss(const stru return skb_shinfo(skb)->gso_size; } -static inline void tcp_dec_pcount_approx(__u32 *count, - const struct sk_buff *skb) +static inline void tcp_dec_pcount_approx_int(__u32 *count, const int decr) { if (*count) { - *count -= tcp_skb_pcount(skb); + *count -= decr; if ((int)*count < 0) *count = 0; } } -static inline void tcp_packets_out_inc(struct sock *sk, - const struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - int orig = tp->packets_out; - - tp->packets_out += tcp_skb_pcount(skb); - if (!orig) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); -} - -static inline void tcp_packets_out_dec(struct tcp_sock *tp, - const struct sk_buff *skb) +static inline void tcp_dec_pcount_approx(__u32 *count, + const struct sk_buff *skb) { - tp->packets_out -= tcp_skb_pcount(skb); + tcp_dec_pcount_approx_int(count, tcp_skb_pcount(skb)); } /* Events passed to congestion control interface */ @@ -704,6 +701,39 @@ static inline void tcp_ca_event(struct s icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* These functions determine how the current flow behaves in respect of SACK + * handling. SACK is negotiated with the peer, and therefore it can vary + * between different flows. + * + * tcp_is_sack - SACK enabled + * tcp_is_reno - No SACK + * tcp_is_fack - FACK enabled, implies SACK enabled + */ +static inline int tcp_is_sack(const struct tcp_sock *tp) +{ + return tp->rx_opt.sack_ok; +} + +static inline int tcp_is_reno(const struct tcp_sock *tp) +{ + return !tcp_is_sack(tp); +} + +static inline int tcp_is_fack(const struct tcp_sock *tp) +{ + return tp->rx_opt.sack_ok & 2; +} + +static inline void tcp_enable_fack(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok |= 2; +} + +static inline unsigned int tcp_left_out(const struct tcp_sock *tp) +{ + return tp->sacked_out + tp->lost_out; +} + /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK @@ -720,7 +750,7 @@ static inline void tcp_ca_event(struct s */ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { - return (tp->packets_out - tp->left_out + tp->retrans_out); + return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. @@ -738,12 +768,8 @@ static inline __u32 tcp_current_ssthresh (tp->snd_cwnd >> 2))); } -static inline void tcp_sync_left_out(struct tcp_sock *tp) -{ - BUG_ON(tp->rx_opt.sack_ok && - (tp->sacked_out + tp->lost_out > tp->packets_out)); - tp->left_out = tp->sacked_out + tp->lost_out; -} +/* Use define here intentionally to get WARN_ON location shown at the caller */ +#define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff -puN include/net/tcp_ecn.h~git-net /dev/null --- a/include/net/tcp_ecn.h +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef _NET_TCP_ECN_H_ -#define _NET_TCP_ECN_H_ 1 - -#include -#include - -#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) - -#define TCP_ECN_OK 1 -#define TCP_ECN_QUEUE_CWR 2 -#define TCP_ECN_DEMAND_CWR 4 - -static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) -{ - if (tp->ecn_flags&TCP_ECN_OK) - tp->ecn_flags |= TCP_ECN_QUEUE_CWR; -} - - -/* Output functions */ - -static inline void TCP_ECN_send_synack(struct tcp_sock *tp, - struct sk_buff *skb) -{ - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; - if (!(tp->ecn_flags&TCP_ECN_OK)) - TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; -} - -static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tp->ecn_flags = 0; - if (sysctl_tcp_ecn) { - TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; - tp->ecn_flags = TCP_ECN_OK; - } -} - -static __inline__ void -TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) -{ - if (inet_rsk(req)->ecn_ok) - th->ece = 1; -} - -static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, - int tcp_header_len) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->ecn_flags & TCP_ECN_OK) { - /* Not-retransmitted data segment: set ECT and inject CWR. */ - if (skb->len != tcp_header_len && - !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { - INET_ECN_xmit(sk); - if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { - tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; - tcp_hdr(skb)->cwr = 1; - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - } - } else { - /* ACK or retransmitted segment: clear ECT|CE */ - INET_ECN_dontxmit(sk); - } - if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) - tcp_hdr(skb)->ece = 1; - } -} - -/* Input functions */ - -static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) -{ - if (tcp_hdr(skb)->cwr) - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; -} - -static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) -{ - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; -} - -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) -{ - if (tp->ecn_flags&TCP_ECN_OK) { - if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - /* Funny extension: if ECT is not set on a segment, - * it is surely retransmit. It is not in ECN RFC, - * but Linux follows this rule. */ - else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) - tcp_enter_quickack_mode((struct sock *)tp); - } -} - -static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) -{ - if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) - tp->ecn_flags &= ~TCP_ECN_OK; -} - -static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) -{ - if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) - tp->ecn_flags &= ~TCP_ECN_OK; -} - -static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) -{ - if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) - return 1; - return 0; -} - -static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, - struct request_sock *req) -{ - tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; -} - -static __inline__ void -TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) -{ - if (sysctl_tcp_ecn && th->ece && th->cwr) - inet_rsk(req)->ecn_ok = 1; -} - -#endif diff -puN /dev/null include/net/veth.h --- /dev/null +++ a/include/net/veth.h @@ -0,0 +1,12 @@ +#ifndef __NET_VETH_H_ +#define __NET_VETH_H_ + +enum { + VETH_INFO_UNSPEC, + VETH_INFO_PEER, + + __VETH_INFO_MAX +#define VETH_INFO_MAX (__VETH_INFO_MAX - 1) +}; + +#endif diff -puN kernel/hrtimer.c~git-net kernel/hrtimer.c --- a/kernel/hrtimer.c~git-net +++ a/kernel/hrtimer.c @@ -301,6 +301,30 @@ ktime_t ktime_sub_ns(const ktime_t kt, u } EXPORT_SYMBOL_GPL(ktime_add_ns); + +/** + * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable + * @kt: minuend + * @nsec: the scalar nsec value to subtract + * + * Returns the subtraction of @nsec from @kt in ktime_t format + */ +ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) +{ + ktime_t tmp; + + if (likely(nsec < NSEC_PER_SEC)) { + tmp.tv64 = nsec; + } else { + unsigned long rem = do_div(nsec, NSEC_PER_SEC); + + tmp = ktime_set((long)nsec, rem); + } + + return ktime_sub(kt, tmp); +} + +EXPORT_SYMBOL_GPL(ktime_sub_ns); # endif /* !CONFIG_KTIME_SCALAR */ /* diff -puN kernel/softirq.c~git-net kernel/softirq.c --- a/kernel/softirq.c~git-net +++ a/kernel/softirq.c @@ -271,8 +271,6 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } -EXPORT_SYMBOL(do_softirq); - #endif /* diff -puN net/atm/br2684.c~git-net net/atm/br2684.c --- a/net/atm/br2684.c~git-net +++ a/net/atm/br2684.c @@ -34,12 +34,6 @@ Author: Marcell GAL, 2000, XDSL Ltd, Hun */ /* #define FASTER_VERSION */ -#ifdef DEBUG -#define DPRINTK(format, args...) printk(KERN_DEBUG "br2684: " format, ##args) -#else -#define DPRINTK(format, args...) -#endif - #ifdef SKB_DEBUG static void skb_debug(const struct sk_buff *skb) { @@ -180,7 +174,7 @@ static int br2684_xmit_vcc(struct sk_buf skb_debug(skb); ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); if (!atm_may_send(atmvcc, skb->truesize)) { /* we free this here for now, because we cannot know in a higher layer whether the skb point it supplied wasn't freed yet. @@ -209,11 +203,11 @@ static int br2684_start_xmit(struct sk_b struct br2684_dev *brdev = BRPRIV(dev); struct br2684_vcc *brvcc; - DPRINTK("br2684_start_xmit, skb->dst=%p\n", skb->dst); + pr_debug("br2684_start_xmit, skb->dst=%p\n", skb->dst); read_lock(&devs_lock); brvcc = pick_outgoing_vcc(skb, brdev); if (brvcc == NULL) { - DPRINTK("no vcc attached to dev %s\n", dev->name); + pr_debug("no vcc attached to dev %s\n", dev->name); brdev->stats.tx_errors++; brdev->stats.tx_carrier_errors++; /* netif_stop_queue(dev); */ @@ -239,7 +233,7 @@ static int br2684_start_xmit(struct sk_b static struct net_device_stats *br2684_get_stats(struct net_device *dev) { - DPRINTK("br2684_get_stats\n"); + pr_debug("br2684_get_stats\n"); return &BRPRIV(dev)->stats; } @@ -390,7 +384,7 @@ packet_fails_filter(__be16 type, struct static void br2684_close_vcc(struct br2684_vcc *brvcc) { - DPRINTK("removing VCC %p from dev %p\n", brvcc, brvcc->device); + pr_debug("removing VCC %p from dev %p\n", brvcc, brvcc->device); write_lock_irq(&devs_lock); list_del(&brvcc->brvccs); write_unlock_irq(&devs_lock); @@ -408,7 +402,7 @@ static void br2684_push(struct atm_vcc * struct br2684_dev *brdev = BRPRIV(net_dev); int plen = sizeof(llc_oui_pid_pad) + ETH_HLEN; - DPRINTK("br2684_push\n"); + pr_debug("br2684_push\n"); if (unlikely(skb == NULL)) { /* skb==NULL means VCC is being destroyed */ @@ -425,7 +419,7 @@ static void br2684_push(struct atm_vcc * skb_debug(skb); atm_return(atmvcc, skb->truesize); - DPRINTK("skb from brdev %p\n", brdev); + pr_debug("skb from brdev %p\n", brdev); if (brvcc->encaps == e_llc) { /* let us waste some time for checking the encapsulation. Note, that only 7 char is checked so frames with a valid FCS @@ -474,7 +468,7 @@ static void br2684_push(struct atm_vcc * #endif /* CONFIG_ATM_BR2684_IPFILTER */ skb->dev = net_dev; ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ - DPRINTK("received packet's protocol: %x\n", ntohs(skb->protocol)); + pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); skb_debug(skb); if (unlikely(!(net_dev->flags & IFF_UP))) { /* sigh, interface is down */ @@ -532,7 +526,7 @@ Note: we do not have explicit unassign, err = -EINVAL; goto error; } - DPRINTK("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, + pr_debug("br2684_regvcc vcc=%p, encaps=%d, brvcc=%p\n", atmvcc, be.encaps, brvcc); if (list_empty(&brdev->brvccs) && !brdev->mac_was_set) { unsigned char *esi = atmvcc->dev->esi; @@ -612,7 +606,7 @@ static int br2684_create(void __user *ar struct br2684_dev *brdev; struct atm_newif_br2684 ni; - DPRINTK("br2684_create\n"); + pr_debug("br2684_create\n"); if (copy_from_user(&ni, arg, sizeof ni)) { return -EFAULT; @@ -629,7 +623,7 @@ static int br2684_create(void __user *ar brdev = BRPRIV(netdev); - DPRINTK("registered netdev %s\n", netdev->name); + pr_debug("registered netdev %s\n", netdev->name); /* open, stop, do_ioctl ? */ err = register_netdev(netdev); if (err < 0) { diff -puN net/atm/clip.c~git-net net/atm/clip.c --- a/net/atm/clip.c~git-net +++ a/net/atm/clip.c @@ -40,14 +40,6 @@ #include "resources.h" #include - -#if 0 -#define DPRINTK(format,args...) printk(format,##args) -#else -#define DPRINTK(format,args...) -#endif - - static struct net_device *clip_devs; static struct atm_vcc *atmarpd; static struct neigh_table clip_tbl; @@ -59,7 +51,7 @@ static int to_atmarpd(enum atmarp_ctrl_t struct atmarp_ctrl *ctrl; struct sk_buff *skb; - DPRINTK("to_atmarpd(%d)\n", type); + pr_debug("to_atmarpd(%d)\n", type); if (!atmarpd) return -EUNATCH; skb = alloc_skb(sizeof(struct atmarp_ctrl),GFP_ATOMIC); @@ -79,7 +71,7 @@ static int to_atmarpd(enum atmarp_ctrl_t static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry) { - DPRINTK("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry, + pr_debug("link_vcc %p to entry %p (neigh %p)\n", clip_vcc, entry, entry->neigh); clip_vcc->entry = entry; clip_vcc->xoff = 0; /* @@@ may overrun buffer by one packet */ @@ -134,7 +126,7 @@ static int neigh_check_cb(struct neighbo unsigned long exp = cv->last_use + cv->idle_timeout; if (cv->idle_timeout && time_after(jiffies, exp)) { - DPRINTK("releasing vcc %p->%p of entry %p\n", + pr_debug("releasing vcc %p->%p of entry %p\n", cv, cv->vcc, entry); vcc_release_async(cv->vcc, -ETIMEDOUT); } @@ -146,7 +138,7 @@ static int neigh_check_cb(struct neighbo if (atomic_read(&n->refcnt) > 1) { struct sk_buff *skb; - DPRINTK("destruction postponed with ref %d\n", + pr_debug("destruction postponed with ref %d\n", atomic_read(&n->refcnt)); while ((skb = skb_dequeue(&n->arp_queue)) != NULL) @@ -155,7 +147,7 @@ static int neigh_check_cb(struct neighbo return 0; } - DPRINTK("expired neigh %p\n", n); + pr_debug("expired neigh %p\n", n); return 1; } @@ -171,14 +163,14 @@ static int clip_arp_rcv(struct sk_buff * { struct atm_vcc *vcc; - DPRINTK("clip_arp_rcv\n"); + pr_debug("clip_arp_rcv\n"); vcc = ATM_SKB(skb)->vcc; if (!vcc || !atm_charge(vcc, skb->truesize)) { dev_kfree_skb_any(skb); return 0; } - DPRINTK("pushing to %p\n", vcc); - DPRINTK("using %p\n", CLIP_VCC(vcc)->old_push); + pr_debug("pushing to %p\n", vcc); + pr_debug("using %p\n", CLIP_VCC(vcc)->old_push); CLIP_VCC(vcc)->old_push(vcc, skb); return 0; } @@ -196,9 +188,9 @@ static void clip_push(struct atm_vcc *vc { struct clip_vcc *clip_vcc = CLIP_VCC(vcc); - DPRINTK("clip push\n"); + pr_debug("clip push\n"); if (!skb) { - DPRINTK("removing VCC %p\n", clip_vcc); + pr_debug("removing VCC %p\n", clip_vcc); if (clip_vcc->entry) unlink_clip_vcc(clip_vcc); clip_vcc->old_push(vcc, NULL); /* pass on the bad news */ @@ -247,7 +239,7 @@ static void clip_pop(struct atm_vcc *vcc int old; unsigned long flags; - DPRINTK("clip_pop(vcc %p)\n", vcc); + pr_debug("clip_pop(vcc %p)\n", vcc); clip_vcc->old_pop(vcc, skb); /* skb->dev == NULL in outbound ARP packets */ if (!dev) @@ -263,7 +255,7 @@ static void clip_pop(struct atm_vcc *vcc static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb) { - DPRINTK("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); + pr_debug("clip_neigh_solicit (neigh %p, skb %p)\n", neigh, skb); to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip); } @@ -292,7 +284,7 @@ static int clip_constructor(struct neigh struct in_device *in_dev; struct neigh_parms *parms; - DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry); + pr_debug("clip_constructor (neigh %p, entry %p)\n", neigh, entry); neigh->type = inet_addr_type(entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; @@ -376,7 +368,7 @@ static int clip_start_xmit(struct sk_buf int old; unsigned long flags; - DPRINTK("clip_start_xmit (skb %p)\n", skb); + pr_debug("clip_start_xmit (skb %p)\n", skb); if (!skb->dst) { printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); dev_kfree_skb(skb); @@ -412,9 +404,9 @@ static int clip_start_xmit(struct sk_buf } return 0; } - DPRINTK("neigh %p, vccs %p\n", entry, entry->vccs); + pr_debug("neigh %p, vccs %p\n", entry, entry->vccs); ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc; - DPRINTK("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); + pr_debug("using neighbour %p, vcc %p\n", skb->dst->neighbour, vcc); if (entry->vccs->encap) { void *here; @@ -425,7 +417,7 @@ static int clip_start_xmit(struct sk_buf atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = vcc->atm_options; entry->vccs->last_use = jiffies; - DPRINTK("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ if (old) { printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n"); @@ -468,7 +460,7 @@ static int clip_mkip(struct atm_vcc *vcc clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL); if (!clip_vcc) return -ENOMEM; - DPRINTK("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc); + pr_debug("mkip clip_vcc %p vcc %p\n", clip_vcc, vcc); clip_vcc->vcc = vcc; vcc->user_back = clip_vcc; set_bit(ATM_VF_IS_CLIP, &vcc->flags); @@ -538,7 +530,7 @@ static int clip_setentry(struct atm_vcc printk(KERN_ERR "hiding hidden ATMARP entry\n"); return 0; } - DPRINTK("setentry: remove\n"); + pr_debug("setentry: remove\n"); unlink_clip_vcc(clip_vcc); return 0; } @@ -552,9 +544,9 @@ static int clip_setentry(struct atm_vcc entry = NEIGH2ENTRY(neigh); if (entry != clip_vcc->entry) { if (!clip_vcc->entry) - DPRINTK("setentry: add\n"); + pr_debug("setentry: add\n"); else { - DPRINTK("setentry: update\n"); + pr_debug("setentry: update\n"); unlink_clip_vcc(clip_vcc); } link_vcc(clip_vcc, entry); @@ -611,7 +603,7 @@ static int clip_create(int number) } clip_priv->next = clip_devs; clip_devs = dev; - DPRINTK("registered (net:%s)\n", dev->name); + pr_debug("registered (net:%s)\n", dev->name); return number; } @@ -631,16 +623,16 @@ static int clip_device_event(struct noti switch (event) { case NETDEV_UP: - DPRINTK("clip_device_event NETDEV_UP\n"); + pr_debug("clip_device_event NETDEV_UP\n"); to_atmarpd(act_up, PRIV(dev)->number, 0); break; case NETDEV_GOING_DOWN: - DPRINTK("clip_device_event NETDEV_DOWN\n"); + pr_debug("clip_device_event NETDEV_DOWN\n"); to_atmarpd(act_down, PRIV(dev)->number, 0); break; case NETDEV_CHANGE: case NETDEV_CHANGEMTU: - DPRINTK("clip_device_event NETDEV_CHANGE*\n"); + pr_debug("clip_device_event NETDEV_CHANGE*\n"); to_atmarpd(act_change, PRIV(dev)->number, 0); break; } @@ -681,14 +673,14 @@ static struct notifier_block clip_inet_n static void atmarpd_close(struct atm_vcc *vcc) { - DPRINTK("atmarpd_close\n"); + pr_debug("atmarpd_close\n"); rtnl_lock(); atmarpd = NULL; skb_queue_purge(&sk_atm(vcc)->sk_receive_queue); rtnl_unlock(); - DPRINTK("(done)\n"); + pr_debug("(done)\n"); module_put(THIS_MODULE); } diff -puN net/atm/common.c~git-net net/atm/common.c --- a/net/atm/common.c~git-net +++ a/net/atm/common.c @@ -30,13 +30,6 @@ #include "addr.h" /* address registry */ #include "signaling.h" /* for WAITING and sigd_attach */ - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - struct hlist_head vcc_hash[VCC_HTABLE_SIZE]; DEFINE_RWLOCK(vcc_sklist_lock); @@ -70,13 +63,13 @@ static struct sk_buff *alloc_tx(struct a struct sock *sk = sk_atm(vcc); if (atomic_read(&sk->sk_wmem_alloc) && !atm_may_send(vcc, size)) { - DPRINTK("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", + pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", atomic_read(&sk->sk_wmem_alloc), size, sk->sk_sndbuf); return NULL; } while (!(skb = alloc_skb(size,GFP_KERNEL))) schedule(); - DPRINTK("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), + pr_debug("AlTx %d += %d\n", atomic_read(&sk->sk_wmem_alloc), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; @@ -392,10 +385,10 @@ static int __vcc_connect(struct atm_vcc if (!error) error = adjust_tp(&vcc->qos.rxtp,vcc->qos.aal); if (error) goto fail; - DPRINTK("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal); - DPRINTK(" TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class, + pr_debug("VCC %d.%d, AAL %d\n",vpi,vci,vcc->qos.aal); + pr_debug(" TX: %d, PCR %d..%d, SDU %d\n",vcc->qos.txtp.traffic_class, vcc->qos.txtp.min_pcr,vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu); - DPRINTK(" RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class, + pr_debug(" RX: %d, PCR %d..%d, SDU %d\n",vcc->qos.rxtp.traffic_class, vcc->qos.rxtp.min_pcr,vcc->qos.rxtp.max_pcr,vcc->qos.rxtp.max_sdu); if (dev->ops->open) { @@ -420,7 +413,7 @@ int vcc_connect(struct socket *sock, int struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("vcc_connect (vpi %d, vci %d)\n",vpi,vci); + pr_debug("vcc_connect (vpi %d, vci %d)\n",vpi,vci); if (sock->state == SS_CONNECTED) return -EISCONN; if (sock->state != SS_UNCONNECTED) @@ -433,7 +426,7 @@ int vcc_connect(struct socket *sock, int else if (test_bit(ATM_VF_PARTIAL,&vcc->flags)) return -EINVAL; - DPRINTK("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; " + pr_debug("vcc_connect (TX: cl %d,bw %d-%d,sdu %d; " "RX: cl %d,bw %d-%d,sdu %d,AAL %s%d)\n", vcc->qos.txtp.traffic_class,vcc->qos.txtp.min_pcr, vcc->qos.txtp.max_pcr,vcc->qos.txtp.max_sdu, @@ -504,7 +497,7 @@ int vcc_recvmsg(struct kiocb *iocb, stru if (error) return error; sock_recv_timestamp(msg, sk, skb); - DPRINTK("RcvM %d -= %d\n", atomic_read(&sk->rmem_alloc), skb->truesize); + pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize); atm_return(vcc, skb->truesize); skb_free_datagram(sk, skb); return copied; diff -puN net/atm/lec.c~git-net net/atm/lec.c --- a/net/atm/lec.c~git-net +++ a/net/atm/lec.c @@ -48,12 +48,6 @@ static unsigned char bridge_ula_lec[] = #include "lec_arpc.h" #include "resources.h" -#if 0 -#define DPRINTK printk -#else -#define DPRINTK(format,args...) -#endif - #define DUMP_PACKETS 0 /* * 0 = None, * 1 = 30 first bytes @@ -273,7 +267,7 @@ static int lec_start_xmit(struct sk_buff int i = 0; #endif /* DUMP_PACKETS >0 */ - DPRINTK("lec_start_xmit called\n"); + pr_debug("lec_start_xmit called\n"); if (!priv->lecd) { printk("%s:No lecd attached\n", dev->name); priv->stats.tx_errors++; @@ -281,7 +275,7 @@ static int lec_start_xmit(struct sk_buff return -EUNATCH; } - DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n", + pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb_end_pointer(skb)); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) @@ -292,7 +286,7 @@ static int lec_start_xmit(struct sk_buff /* Make sure we have room for lec_id */ if (skb_headroom(skb) < 2) { - DPRINTK("lec_start_xmit: reallocating skb\n"); + pr_debug("lec_start_xmit: reallocating skb\n"); skb2 = skb_realloc_headroom(skb, LEC_HEADER_LEN); kfree_skb(skb); if (skb2 == NULL) @@ -373,22 +367,22 @@ static int lec_start_xmit(struct sk_buff #endif entry = NULL; vcc = lec_arp_resolve(priv, dst, is_rdesc, &entry); - DPRINTK("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, + pr_debug("%s:vcc:%p vcc_flags:%x, entry:%p\n", dev->name, vcc, vcc ? vcc->flags : 0, entry); if (!vcc || !test_bit(ATM_VF_READY, &vcc->flags)) { if (entry && (entry->tx_wait.qlen < LEC_UNRES_QUE_LEN)) { - DPRINTK("%s:lec_start_xmit: queuing packet, ", + pr_debug("%s:lec_start_xmit: queuing packet, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + pr_debug("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); skb_queue_tail(&entry->tx_wait, skb); } else { - DPRINTK + pr_debug ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + pr_debug("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); @@ -402,8 +396,8 @@ static int lec_start_xmit(struct sk_buff #endif /* DUMP_PACKETS > 0 */ while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { - DPRINTK("lec.c: emptying tx queue, "); - DPRINTK("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", + pr_debug("lec.c: emptying tx queue, "); + pr_debug("MAC address 0x%02x:%02x:%02x:%02x:%02x:%02x\n", lec_h->h_dest[0], lec_h->h_dest[1], lec_h->h_dest[2], lec_h->h_dest[3], lec_h->h_dest[4], lec_h->h_dest[5]); lec_send(vcc, skb2, priv); @@ -464,7 +458,7 @@ static int lec_atm_send(struct atm_vcc * mesg = (struct atmlec_msg *)skb->data; tmp = skb->data; tmp += sizeof(struct atmlec_msg); - DPRINTK("%s: msg from zeppelin:%d\n", dev->name, mesg->type); + pr_debug("%s: msg from zeppelin:%d\n", dev->name, mesg->type); switch (mesg->type) { case l_set_mac_addr: for (i = 0; i < 6; i++) { @@ -500,9 +494,9 @@ static int lec_atm_send(struct atm_vcc * mesg->content.normal.atm_addr, mesg->content.normal.flag, mesg->content.normal.targetless_le_arp); - DPRINTK("lec: in l_arp_update\n"); + pr_debug("lec: in l_arp_update\n"); if (mesg->sizeoftlvs != 0) { /* LANE2 3.1.5 */ - DPRINTK("lec: LANE2 3.1.5, got tlvs, size %d\n", + pr_debug("lec: LANE2 3.1.5, got tlvs, size %d\n", mesg->sizeoftlvs); lane2_associate_ind(dev, mesg->content.normal.mac_addr, tmp, mesg->sizeoftlvs); @@ -544,7 +538,7 @@ static int lec_atm_send(struct atm_vcc * { struct net_bridge_fdb_entry *f; - DPRINTK + pr_debug ("%s: bridge zeppelin asks about 0x%02x:%02x:%02x:%02x:%02x:%02x\n", dev->name, mesg->content.proxy.mac_addr[0], mesg->content.proxy.mac_addr[1], @@ -564,7 +558,7 @@ static int lec_atm_send(struct atm_vcc * struct sk_buff *skb2; struct sock *sk; - DPRINTK + pr_debug ("%s: entry found, responding to zeppelin\n", dev->name); skb2 = @@ -670,7 +664,7 @@ send_to_lecd(struct lec_priv *priv, atml sk->sk_data_ready(sk, skb->len); if (data != NULL) { - DPRINTK("lec: about to send %d bytes of data\n", data->len); + pr_debug("lec: about to send %d bytes of data\n", data->len); atm_force_charge(priv->lecd, data->truesize); skb_queue_tail(&sk->sk_receive_queue, data); sk->sk_data_ready(sk, skb->len); @@ -742,7 +736,7 @@ static void lec_push(struct atm_vcc *vcc vcc->vpi, vcc->vci); #endif if (!skb) { - DPRINTK("%s: null skb\n", dev->name); + pr_debug("%s: null skb\n", dev->name); lec_vcc_close(priv, vcc); return; } @@ -766,7 +760,7 @@ static void lec_push(struct atm_vcc *vcc if (memcmp(skb->data, lec_ctrl_magic, 4) == 0) { /* Control frame, to daemon */ struct sock *sk = sk_atm(vcc); - DPRINTK("%s: To daemon\n", dev->name); + pr_debug("%s: To daemon\n", dev->name); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); } else { /* Data frame, queue to protocol handlers */ @@ -780,7 +774,7 @@ static void lec_push(struct atm_vcc *vcc * Probably looping back, or if lecd is missing, * lecd has gone down */ - DPRINTK("Ignoring frame...\n"); + pr_debug("Ignoring frame...\n"); dev_kfree_skb(skb); return; } @@ -1442,9 +1436,9 @@ static void lane2_associate_ind(struct n #include #if 0 -#define DPRINTK(format,args...) +#define pr_debug(format,args...) /* -#define DPRINTK printk +#define pr_debug printk */ #endif #define DEBUG_ARP_TABLE 0 @@ -1513,7 +1507,7 @@ lec_arp_add(struct lec_priv *priv, struc tmp = &priv->lec_arp_tables[HASH(entry->mac_addr[ETH_ALEN - 1])]; hlist_add_head(&entry->next, tmp); - DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff & entry->mac_addr[0], 0xff & entry->mac_addr[1], 0xff & entry->mac_addr[2], 0xff & entry->mac_addr[3], 0xff & entry->mac_addr[4], 0xff & entry->mac_addr[5]); @@ -1555,7 +1549,7 @@ lec_arp_remove(struct lec_priv *priv, st } skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff & to_remove->mac_addr[0], 0xff & to_remove->mac_addr[1], 0xff & to_remove->mac_addr[2], 0xff & to_remove->mac_addr[3], 0xff & to_remove->mac_addr[4], 0xff & to_remove->mac_addr[5]); @@ -1777,7 +1771,7 @@ static struct lec_arp_table *lec_arp_fin struct hlist_head *head; struct lec_arp_table *entry; - DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", + pr_debug("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", mac_addr[0] & 0xff, mac_addr[1] & 0xff, mac_addr[2] & 0xff, mac_addr[3] & 0xff, mac_addr[4] & 0xff, mac_addr[5] & 0xff); @@ -1819,7 +1813,7 @@ static void lec_arp_expire_arp(unsigned entry = (struct lec_arp_table *)data; - DPRINTK("lec_arp_expire_arp\n"); + pr_debug("lec_arp_expire_arp\n"); if (entry->status == ESI_ARP_PENDING) { if (entry->no_tries <= entry->priv->max_retry_count) { if (entry->is_rdesc) @@ -1843,7 +1837,7 @@ static void lec_arp_expire_vcc(unsigned del_timer(&to_remove->timer); - DPRINTK("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", + pr_debug("LEC_ARP %p %p: lec_arp_expire_vcc vpi:%d vci:%d\n", to_remove, priv, to_remove->vcc ? to_remove->recv_vcc->vpi : 0, to_remove->vcc ? to_remove->recv_vcc->vci : 0); @@ -1883,7 +1877,7 @@ static void lec_arp_check_expire(struct unsigned long time_to_check; int i; - DPRINTK("lec_arp_check_expire %p\n", priv); + pr_debug("lec_arp_check_expire %p\n", priv); now = jiffies; restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); @@ -1895,13 +1889,13 @@ restart: else time_to_check = priv->aging_time; - DPRINTK("About to expire: %lx - %lx > %lx\n", + pr_debug("About to expire: %lx - %lx > %lx\n", now, entry->last_used, time_to_check); if (time_after(now, entry->last_used + time_to_check) && !(entry->flags & LEC_PERMANENT_FLAG) && !(entry->mac_addr[0] & 0x01)) { /* LANE2: 7.1.20 */ /* Remove entry */ - DPRINTK("LEC:Entry timed out\n"); + pr_debug("LEC:Entry timed out\n"); lec_arp_remove(priv, entry); lec_arp_put(entry); } else { @@ -1999,7 +1993,7 @@ static struct atm_vcc *lec_arp_resolve(s entry->packets_flooded < priv->maximum_unknown_frame_count) { entry->packets_flooded++; - DPRINTK("LEC_ARP: Flooding..\n"); + pr_debug("LEC_ARP: Flooding..\n"); found = priv->mcast_vcc; goto out; } @@ -2010,13 +2004,13 @@ static struct atm_vcc *lec_arp_resolve(s */ lec_arp_hold(entry); *ret_entry = entry; - DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, + pr_debug("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc); found = NULL; } else { /* No matching entry was found */ entry = make_entry(priv, mac_to_find); - DPRINTK("LEC_ARP: Making entry\n"); + pr_debug("LEC_ARP: Making entry\n"); if (!entry) { found = priv->mcast_vcc; goto out; @@ -2053,7 +2047,7 @@ lec_addr_delete(struct lec_priv *priv, u struct lec_arp_table *entry; int i; - DPRINTK("lec_addr_delete\n"); + pr_debug("lec_addr_delete\n"); spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { hlist_for_each_entry_safe(entry, node, next, &priv->lec_arp_tables[i], next) { @@ -2084,8 +2078,8 @@ lec_arp_update(struct lec_priv *priv, un struct lec_arp_table *entry, *tmp; int i; - DPRINTK("lec:%s", (targetless_le_arp) ? "targetless " : " "); - DPRINTK("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", + pr_debug("lec:%s", (targetless_le_arp) ? "targetless " : " "); + pr_debug("lec_arp_update mac:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], mac_addr[4], mac_addr[5]); @@ -2122,7 +2116,7 @@ lec_arp_update(struct lec_priv *priv, un entry->flags |= LEC_REMOTE_FLAG; else entry->flags &= ~LEC_REMOTE_FLAG; - DPRINTK("After update\n"); + pr_debug("After update\n"); dump_arp_table(priv); goto out; } @@ -2166,7 +2160,7 @@ lec_arp_update(struct lec_priv *priv, un entry->status = ESI_VC_PENDING; send_to_lecd(priv, l_svc_setup, entry->mac_addr, atm_addr, NULL); } - DPRINTK("After update2\n"); + pr_debug("After update2\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2189,7 +2183,7 @@ lec_vcc_added(struct lec_priv *priv, str if (ioc_data->receive == 2) { /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ - DPRINTK("LEC_ARP: Attaching mcast forward\n"); + pr_debug("LEC_ARP: Attaching mcast forward\n"); #if 0 entry = lec_arp_find(priv, bus_mac); if (!entry) { @@ -2214,7 +2208,7 @@ lec_vcc_added(struct lec_priv *priv, str * Vcc which we don't want to make default vcc, * attach it anyway. */ - DPRINTK + pr_debug ("LEC_ARP:Attaching data direct, not default: " "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0], ioc_data->atm_addr[1], @@ -2242,7 +2236,7 @@ lec_vcc_added(struct lec_priv *priv, str dump_arp_table(priv); goto out; } - DPRINTK + pr_debug ("LEC_ARP:Attaching data direct, default: " "%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0], ioc_data->atm_addr[1], @@ -2260,8 +2254,8 @@ lec_vcc_added(struct lec_priv *priv, str if (memcmp (ioc_data->atm_addr, entry->atm_addr, ATM_ESA_LEN) == 0) { - DPRINTK("LEC_ARP: Attaching data direct\n"); - DPRINTK("Currently -> Vcc: %d, Rvcc:%d\n", + pr_debug("LEC_ARP: Attaching data direct\n"); + pr_debug("Currently -> Vcc: %d, Rvcc:%d\n", entry->vcc ? entry->vcc->vci : 0, entry->recv_vcc ? entry->recv_vcc-> vci : 0); @@ -2303,7 +2297,7 @@ lec_vcc_added(struct lec_priv *priv, str } } if (found_entry) { - DPRINTK("After vcc was added\n"); + pr_debug("After vcc was added\n"); dump_arp_table(priv); goto out; } @@ -2323,7 +2317,7 @@ lec_vcc_added(struct lec_priv *priv, str entry->timer.expires = jiffies + priv->vcc_timeout_period; entry->timer.function = lec_arp_expire_vcc; add_timer(&entry->timer); - DPRINTK("After vcc was added\n"); + pr_debug("After vcc was added\n"); dump_arp_table(priv); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); @@ -2336,7 +2330,7 @@ static void lec_flush_complete(struct le struct lec_arp_table *entry; int i; - DPRINTK("LEC:lec_flush_complete %lx\n", tran_id); + pr_debug("LEC:lec_flush_complete %lx\n", tran_id); restart: spin_lock_irqsave(&priv->lec_arp_lock, flags); for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { @@ -2353,7 +2347,7 @@ restart: entry->last_used = jiffies; entry->status = ESI_FORWARD_DIRECT; lec_arp_put(entry); - DPRINTK("LEC_ARP: Flushed\n"); + pr_debug("LEC_ARP: Flushed\n"); goto restart; } } @@ -2376,7 +2370,7 @@ lec_set_flush_tran_id(struct lec_priv *p hlist_for_each_entry(entry, node, &priv->lec_arp_tables[i], next) { if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; - DPRINTK("Set flush transaction id to %lx for %p\n", + pr_debug("Set flush transaction id to %lx for %p\n", tran_id, entry); } } @@ -2427,7 +2421,7 @@ static void lec_vcc_close(struct lec_pri struct lec_arp_table *entry; int i; - DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); + pr_debug("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n", vcc->vpi, vcc->vci); dump_arp_table(priv); spin_lock_irqsave(&priv->lec_arp_lock, flags); @@ -2510,7 +2504,7 @@ lec_arp_check_empties(struct lec_priv *p goto out; } } - DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); + pr_debug("LEC_ARP: Arp_check_empties: entry not found!\n"); out: spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } diff -puN net/atm/pppoatm.c~git-net net/atm/pppoatm.c --- a/net/atm/pppoatm.c~git-net +++ a/net/atm/pppoatm.c @@ -46,13 +46,6 @@ #include "common.h" -#if 0 -#define DPRINTK(format, args...) \ - printk(KERN_DEBUG "pppoatm: " format, ##args) -#else -#define DPRINTK(format, args...) -#endif - enum pppoatm_encaps { e_autodetect = PPPOATM_ENCAPS_AUTODETECT, e_vc = PPPOATM_ENCAPS_VC, @@ -139,9 +132,9 @@ static void pppoatm_unassign_vcc(struct static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) { struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc); - DPRINTK("pppoatm push\n"); + pr_debug("pppoatm push\n"); if (skb == NULL) { /* VCC was closed */ - DPRINTK("removing ATMPPP VCC %p\n", pvcc); + pr_debug("removing ATMPPP VCC %p\n", pvcc); pppoatm_unassign_vcc(atmvcc); atmvcc->push(atmvcc, NULL); /* Pass along bad news */ return; @@ -172,9 +165,8 @@ static void pppoatm_push(struct atm_vcc pvcc->chan.mtu += LLC_LEN; break; } - DPRINTK("(unit %d): Couldn't autodetect yet " + pr_debug("Couldn't autodetect yet " "(skb: %02X %02X %02X %02X %02X %02X)\n", - pvcc->chan.unit, skb->data[0], skb->data[1], skb->data[2], skb->data[3], skb->data[4], skb->data[5]); goto error; @@ -202,8 +194,7 @@ static int pppoatm_send(struct ppp_chann { struct pppoatm_vcc *pvcc = chan_to_pvcc(chan); ATM_SKB(skb)->vcc = pvcc->atmvcc; - DPRINTK("(unit %d): pppoatm_send (skb=0x%p, vcc=0x%p)\n", - pvcc->chan.unit, skb, pvcc->atmvcc); + pr_debug("pppoatm_send (skb=0x%p, vcc=0x%p)\n", skb, pvcc->atmvcc); if (skb->data[0] == '\0' && (pvcc->flags & SC_COMP_PROT)) (void) skb_pull(skb, 1); switch (pvcc->encaps) { /* LLC encapsulation needed */ @@ -228,16 +219,14 @@ static int pppoatm_send(struct ppp_chann goto nospace; break; case e_autodetect: - DPRINTK("(unit %d): Trying to send without setting encaps!\n", - pvcc->chan.unit); + pr_debug("Trying to send without setting encaps!\n"); kfree_skb(skb); return 1; } atomic_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; - DPRINTK("(unit %d): atm_skb(%p)->vcc(%p)->dev(%p)\n", - pvcc->chan.unit, skb, ATM_SKB(skb)->vcc, + pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); return ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) ? DROP_PACKET : 1; diff -puN net/atm/raw.c~git-net net/atm/raw.c --- a/net/atm/raw.c~git-net +++ a/net/atm/raw.c @@ -13,14 +13,6 @@ #include "common.h" #include "protocols.h" - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - /* * SKB == NULL indicates that the link is being closed */ @@ -40,8 +32,8 @@ static void atm_pop_raw(struct atm_vcc * { struct sock *sk = sk_atm(vcc); - DPRINTK("APopR (%d) %d -= %d\n", vcc->vci, sk->sk_wmem_alloc, - skb->truesize); + pr_debug("APopR (%d) %d -= %d\n", vcc->vci, + atomic_read(&sk->sk_wmem_alloc), skb->truesize); atomic_sub(skb->truesize, &sk->sk_wmem_alloc); dev_kfree_skb_any(skb); sk->sk_write_space(sk); diff -puN net/atm/signaling.c~git-net net/atm/signaling.c --- a/net/atm/signaling.c~git-net +++ a/net/atm/signaling.c @@ -23,13 +23,6 @@ Danger: may cause nasty hangs if the demon crashes. */ -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - struct atm_vcc *sigd = NULL; #ifdef WAIT_FOR_DEMON static DECLARE_WAIT_QUEUE_HEAD(sigd_sleep); @@ -44,14 +37,14 @@ static void sigd_put_skb(struct sk_buff add_wait_queue(&sigd_sleep,&wait); while (!sigd) { set_current_state(TASK_UNINTERRUPTIBLE); - DPRINTK("atmsvc: waiting for signaling demon...\n"); + pr_debug("atmsvc: waiting for signaling demon...\n"); schedule(); } current->state = TASK_RUNNING; remove_wait_queue(&sigd_sleep,&wait); #else if (!sigd) { - DPRINTK("atmsvc: no signaling demon\n"); + pr_debug("atmsvc: no signaling demon\n"); kfree_skb(skb); return; } @@ -96,9 +89,9 @@ static int sigd_send(struct atm_vcc *vcc msg = (struct atmsvc_msg *) skb->data; atomic_sub(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - DPRINTK("sigd_send %d (0x%lx)\n",(int) msg->type, - (unsigned long) msg->vcc); vcc = *(struct atm_vcc **) &msg->vcc; + pr_debug("sigd_send %d (0x%lx)\n",(int) msg->type, + (unsigned long) vcc); sk = sk_atm(vcc); switch (msg->type) { @@ -130,7 +123,7 @@ static int sigd_send(struct atm_vcc *vcc case as_indicate: vcc = *(struct atm_vcc **) &msg->listen_vcc; sk = sk_atm(vcc); - DPRINTK("as_indicate!!!\n"); + pr_debug("as_indicate!!!\n"); lock_sock(sk); if (sk_acceptq_is_full(sk)) { sigd_enq(NULL,as_reject,vcc,NULL,NULL); @@ -139,7 +132,7 @@ static int sigd_send(struct atm_vcc *vcc } sk->sk_ack_backlog++; skb_queue_tail(&sk->sk_receive_queue, skb); - DPRINTK("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); + pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); sk->sk_state_change(sk); as_indicate_complete: release_sock(sk); @@ -176,7 +169,7 @@ void sigd_enq2(struct atm_vcc *vcc,enum struct atmsvc_msg *msg; static unsigned session = 0; - DPRINTK("sigd_enq %d (0x%p)\n",(int) type,vcc); + pr_debug("sigd_enq %d (0x%p)\n",(int) type,vcc); while (!(skb = alloc_skb(sizeof(struct atmsvc_msg),GFP_KERNEL))) schedule(); msg = (struct atmsvc_msg *) skb_put(skb,sizeof(struct atmsvc_msg)); @@ -226,7 +219,7 @@ static void sigd_close(struct atm_vcc *v struct sock *s; int i; - DPRINTK("sigd_close\n"); + pr_debug("sigd_close\n"); sigd = NULL; if (skb_peek(&sk_atm(vcc)->sk_receive_queue)) printk(KERN_ERR "sigd_close: closing with requests pending\n"); @@ -263,7 +256,7 @@ static struct atm_dev sigd_dev = { int sigd_attach(struct atm_vcc *vcc) { if (sigd) return -EADDRINUSE; - DPRINTK("sigd_attach\n"); + pr_debug("sigd_attach\n"); sigd = vcc; vcc->dev = &sigd_dev; vcc_insert_socket(sk_atm(vcc)); diff -puN net/atm/svc.c~git-net net/atm/svc.c --- a/net/atm/svc.c~git-net +++ a/net/atm/svc.c @@ -25,17 +25,8 @@ #include "signaling.h" #include "addr.h" - -#if 0 -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - - static int svc_create(struct socket *sock,int protocol); - /* * Note: since all this is still nicely synchronized with the signaling demon, * there's no need to protect sleep loops with clis. If signaling is @@ -55,7 +46,7 @@ static void svc_disconnect(struct atm_vc struct sk_buff *skb; struct sock *sk = sk_atm(vcc); - DPRINTK("svc_disconnect %p\n",vcc); + pr_debug("svc_disconnect %p\n",vcc); if (test_bit(ATM_VF_REGIS,&vcc->flags)) { prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); sigd_enq(vcc,as_close,NULL,NULL,NULL); @@ -69,7 +60,7 @@ static void svc_disconnect(struct atm_vc as_indicate has been answered */ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { atm_return(vcc, skb->truesize); - DPRINTK("LISTEN REL\n"); + pr_debug("LISTEN REL\n"); sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0); dev_kfree_skb(skb); } @@ -85,7 +76,7 @@ static int svc_release(struct socket *so if (sk) { vcc = ATM_SD(sock); - DPRINTK("svc_release %p\n", vcc); + pr_debug("svc_release %p\n", vcc); clear_bit(ATM_VF_READY, &vcc->flags); /* VCC pointer is used as a reference, so we must not free it (thereby subjecting it to re-use) before all pending connections @@ -162,7 +153,7 @@ static int svc_connect(struct socket *so struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("svc_connect %p\n",vcc); + pr_debug("svc_connect %p\n",vcc); lock_sock(sk); if (sockaddr_len != sizeof(struct sockaddr_atmsvc)) { error = -EINVAL; @@ -224,7 +215,7 @@ static int svc_connect(struct socket *so prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); continue; } - DPRINTK("*ABORT*\n"); + pr_debug("*ABORT*\n"); /* * This is tricky: * Kernel ---close--> Demon @@ -295,7 +286,7 @@ static int svc_listen(struct socket *soc struct atm_vcc *vcc = ATM_SD(sock); int error; - DPRINTK("svc_listen %p\n",vcc); + pr_debug("svc_listen %p\n",vcc); lock_sock(sk); /* let server handle listen on unbound sockets */ if (test_bit(ATM_VF_SESSION,&vcc->flags)) { @@ -341,7 +332,7 @@ static int svc_accept(struct socket *soc new_vcc = ATM_SD(newsock); - DPRINTK("svc_accept %p -> %p\n",old_vcc,new_vcc); + pr_debug("svc_accept %p -> %p\n",old_vcc,new_vcc); while (1) { DEFINE_WAIT(wait); @@ -545,7 +536,7 @@ static int svc_addparty(struct socket *s error = -EINPROGRESS; goto out; } - DPRINTK("svc_addparty added wait queue\n"); + pr_debug("svc_addparty added wait queue\n"); while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { schedule(); prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); diff -puN net/bridge/br_stp_bpdu.c~git-net net/bridge/br_stp_bpdu.c --- a/net/bridge/br_stp_bpdu.c~git-net +++ a/net/bridge/br_stp_bpdu.c @@ -64,7 +64,7 @@ static inline int br_get_ticks(const uns { unsigned long ticks = ntohs(get_unaligned((__be16 *)src)); - return (ticks * HZ + STP_HZ - 1) / STP_HZ; + return DIV_ROUND_UP(ticks * HZ, STP_HZ); } /* called under bridge lock */ diff -puN net/core/dev.c~git-net net/core/dev.c --- a/net/core/dev.c~git-net +++ a/net/core/dev.c @@ -220,7 +220,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain); * Device drivers call our routines to queue packets here. We empty the * queue in the local softnet handler. */ -DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; + +DEFINE_PER_CPU(struct softnet_data, softnet_data); #ifdef CONFIG_SYSFS extern int netdev_sysfs_init(void); @@ -1018,16 +1019,12 @@ int dev_close(struct net_device *dev) clear_bit(__LINK_STATE_START, &dev->state); /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(), - * and wait when poll really will happen. Actually, the best place - * for this is inside dev->stop() after device stopped its irq - * engine, but this requires more changes in devices. */ - + * it can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ smp_mb__after_clear_bit(); /* Commit netif_running(). */ - while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { - /* No hurry. */ - msleep(1); - } /* * Call the device specific close. This cannot fail. @@ -1233,21 +1230,21 @@ void __netif_schedule(struct net_device } EXPORT_SYMBOL(__netif_schedule); -void __netif_rx_schedule(struct net_device *dev) +void dev_kfree_skb_irq(struct sk_buff *skb) { - unsigned long flags; + if (atomic_dec_and_test(&skb->users)) { + struct softnet_data *sd; + unsigned long flags; - local_irq_save(flags); - dev_hold(dev); - list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - local_irq_restore(flags); + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + skb->next = sd->completion_queue; + sd->completion_queue = skb; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } } -EXPORT_SYMBOL(__netif_rx_schedule); +EXPORT_SYMBOL(dev_kfree_skb_irq); void dev_kfree_skb_any(struct sk_buff *skb) { @@ -1259,7 +1256,12 @@ void dev_kfree_skb_any(struct sk_buff *s EXPORT_SYMBOL(dev_kfree_skb_any); -/* Hot-plugging. */ +/** + * netif_device_detach - mark device as removed + * @dev: network device + * + * Mark device as removed from system and therefore no longer available. + */ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1269,6 +1271,12 @@ void netif_device_detach(struct net_devi } EXPORT_SYMBOL(netif_device_detach); +/** + * netif_device_attach - mark device as attached + * @dev: network device + * + * Mark device as attached from system and restart if needed. + */ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && @@ -1730,7 +1738,7 @@ enqueue: return NET_RX_SUCCESS; } - netif_rx_schedule(&queue->backlog_dev); + napi_schedule(&queue->backlog); goto enqueue; } @@ -1771,6 +1779,7 @@ static inline struct net_device *skb_bon return dev; } + static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = &__get_cpu_var(softnet_data); @@ -1927,7 +1936,7 @@ int netif_receive_skb(struct sk_buff *sk __be16 type; /* if we've gotten here through NAPI, check netpoll */ - if (skb->dev->poll && netpoll_rx(skb)) + if (netpoll_receive_skb(skb)) return NET_RX_DROP; if (!skb->tstamp.tv64) @@ -2017,90 +2026,101 @@ out: return ret; } -static int process_backlog(struct net_device *backlog_dev, int *budget) +static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; - int quota = min(backlog_dev->quota, *budget); struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; - backlog_dev->weight = weight_p; - for (;;) { + napi->weight = weight_p; + do { struct sk_buff *skb; struct net_device *dev; local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); - if (!skb) - goto job_done; local_irq_enable(); + if (!skb) { + napi_complete(napi); + break; + } dev = skb->dev; netif_receive_skb(skb); dev_put(dev); + } while (++work < quota && jiffies == start_time); - work++; - - if (work >= quota || jiffies - start_time > 1) - break; + return work; +} - } +/** + * __napi_schedule - schedule for receive + * @napi: entry to schedule + * + * The entry's receive function will be scheduled to run + */ +void fastcall __napi_schedule(struct napi_struct *n) +{ + unsigned long flags; - backlog_dev->quota -= work; - *budget -= work; - return -1; - -job_done: - backlog_dev->quota -= work; - *budget -= work; - - list_del(&backlog_dev->poll_list); - smp_mb__before_clear_bit(); - netif_poll_enable(backlog_dev); + if (n->quota < 0) + n->quota += n->weight; + else + n->quota = n->weight; - local_irq_enable(); - return 0; + local_irq_save(flags); + list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_restore(flags); } +EXPORT_SYMBOL(__napi_schedule); + static void net_rx_action(struct softirq_action *h) { - struct softnet_data *queue = &__get_cpu_var(softnet_data); + struct list_head list; unsigned long start_time = jiffies; int budget = netdev_budget; void *have; local_irq_disable(); + list_replace_init(&__get_cpu_var(softnet_data).poll_list, &list); + local_irq_enable(); - while (!list_empty(&queue->poll_list)) { - struct net_device *dev; + while (!list_empty(&list)) { + struct napi_struct *n; + + /* if softirq window is exhuasted then punt */ + if (unlikely(budget <= 0 || jiffies != start_time)) { + local_irq_disable(); + list_splice(&list, &__get_cpu_var(softnet_data).poll_list); + __raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_irq_enable(); + break; + } - if (budget <= 0 || jiffies - start_time > 1) - goto softnet_break; + n = list_entry(list.next, struct napi_struct, poll_list); - local_irq_enable(); + have = netpoll_poll_lock(n); - dev = list_entry(queue->poll_list.next, - struct net_device, poll_list); - have = netpoll_poll_lock(dev); + list_del(&n->poll_list); - if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(have); - local_irq_disable(); - list_move_tail(&dev->poll_list, &queue->poll_list); - if (dev->quota < 0) - dev->quota += dev->weight; - else - dev->quota = dev->weight; - } else { - netpoll_poll_unlock(have); - dev_put(dev); - local_irq_disable(); + /* if quota not exhausted process work */ + if (likely(n->quota > 0)) { + int work = n->poll(n, min(budget, n->quota)); + + budget -= work; + n->quota -= work; } + + /* if napi_complete not called, reschedule */ + if (test_bit(NAPI_STATE_SCHED, &n->state)) + __napi_schedule(n); + + netpoll_poll_unlock(have); } -out: - local_irq_enable(); #ifdef CONFIG_NET_DMA /* * There may not be any more sk_buffs coming right now, so push @@ -2115,12 +2135,6 @@ out: } } #endif - return; - -softnet_break: - __get_cpu_var(netdev_rx_stat).time_squeeze++; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); - goto out; } static gifconf_func_t * gifconf_list [NPROTO]; @@ -3704,6 +3718,7 @@ struct net_device *alloc_netdev_mq(int s dev->egress_subqueue_count = queue_count; dev->get_stats = internal_stats; + netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); return dev; @@ -4076,10 +4091,9 @@ static int __init net_dev_init(void) skb_queue_head_init(&queue->input_pkt_queue); queue->completion_queue = NULL; INIT_LIST_HEAD(&queue->poll_list); - set_bit(__LINK_STATE_START, &queue->backlog_dev.state); - queue->backlog_dev.weight = weight_p; - queue->backlog_dev.poll = process_backlog; - atomic_set(&queue->backlog_dev.refcnt, 1); + + queue->backlog.poll = process_backlog; + queue->backlog.weight = weight_p; } netdev_dma_register(); diff -puN net/core/ethtool.c~git-net net/core/ethtool.c --- a/net/core/ethtool.c~git-net +++ a/net/core/ethtool.c @@ -109,6 +109,32 @@ int ethtool_op_set_ufo(struct net_device return 0; } +/* the following list of flags are the same as their associated + * NETIF_F_xxx values in include/linux/netdevice.h + */ +static const u32 flags_dup_features = + ETH_FLAG_LRO; + +u32 ethtool_op_get_flags(struct net_device *dev) +{ + /* in the future, this function will probably contain additional + * handling for flags which are not so easily handled + * by a simple masking operation + */ + + return dev->features & flags_dup_features; +} + +int ethtool_op_set_flags(struct net_device *dev, u32 data) +{ + if (data & ETH_FLAG_LRO) + dev->features |= NETIF_F_LRO; + else + dev->features &= ~NETIF_F_LRO; + + return 0; +} + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -153,10 +179,26 @@ static int ethtool_get_drvinfo(struct ne info.cmd = ETHTOOL_GDRVINFO; ops->get_drvinfo(dev, &info); - if (ops->self_test_count) - info.testinfo_len = ops->self_test_count(dev); - if (ops->get_stats_count) - info.n_stats = ops->get_stats_count(dev); + if (ops->get_sset_count) { + int rc; + + rc = ops->get_sset_count(dev, ETH_SS_TEST); + if (rc >= 0) + info.testinfo_len = rc; + rc = ops->get_sset_count(dev, ETH_SS_STATS); + if (rc >= 0) + info.n_stats = rc; + rc = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (rc >= 0) + info.n_priv_flags = rc; + } else { + /* code path for obsolete hooks */ + + if (ops->self_test_count) + info.testinfo_len = ops->self_test_count(dev); + if (ops->get_stats_count) + info.n_stats = ops->get_stats_count(dev); + } if (ops->get_regs_len) info.regdump_len = ops->get_regs_len(dev); if (ops->get_eeprom_len) @@ -230,34 +272,6 @@ static int ethtool_set_wol(struct net_de return dev->ethtool_ops->set_wol(dev, &wol); } -static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GMSGLVL }; - - if (!dev->ethtool_ops->get_msglevel) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_msglevel(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_msglevel) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_msglevel(dev, edata.data); - return 0; -} - static int ethtool_nway_reset(struct net_device *dev) { if (!dev->ethtool_ops->nway_reset) @@ -266,20 +280,6 @@ static int ethtool_nway_reset(struct net return dev->ethtool_ops->nway_reset(dev); } -static int ethtool_get_link(struct net_device *dev, void __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GLINK }; - - if (!dev->ethtool_ops->get_link) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_link(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -447,48 +447,6 @@ static int ethtool_set_pauseparam(struct return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); } -static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GRXCSUM }; - - if (!dev->ethtool_ops->get_rx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_rx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - -static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata; - - if (!dev->ethtool_ops->set_rx_csum) - return -EOPNOTSUPP; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - - dev->ethtool_ops->set_rx_csum(dev, edata.data); - return 0; -} - -static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTXCSUM }; - - if (!dev->ethtool_ops->get_tx_csum) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tx_csum(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int __ethtool_set_sg(struct net_device *dev, u32 data) { int err; @@ -527,20 +485,6 @@ static int ethtool_set_tx_csum(struct ne return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -static int ethtool_get_sg(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GSG }; - - if (!dev->ethtool_ops->get_sg) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_sg(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -558,20 +502,6 @@ static int ethtool_set_sg(struct net_dev return __ethtool_set_sg(dev, edata.data); } -static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GTSO }; - - if (!dev->ethtool_ops->get_tso) - return -EOPNOTSUPP; - - edata.data = dev->ethtool_ops->get_tso(dev); - - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -588,18 +518,6 @@ static int ethtool_set_tso(struct net_de return dev->ethtool_ops->set_tso(dev, edata.data); } -static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) -{ - struct ethtool_value edata = { ETHTOOL_GUFO }; - - if (!dev->ethtool_ops->get_ufo) - return -EOPNOTSUPP; - edata.data = dev->ethtool_ops->get_ufo(dev); - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - return 0; -} - static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -643,16 +561,27 @@ static int ethtool_self_test(struct net_ struct ethtool_test test; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, test_len; - if (!ops->self_test || !ops->self_test_count) + if (!ops->self_test) + return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->self_test_count) return -EOPNOTSUPP; + if (ops->get_sset_count) + test_len = ops->get_sset_count(dev, ETH_SS_TEST); + else + /* code path for obsolete hook */ + test_len = ops->self_test_count(dev); + if (test_len < 0) + return test_len; + WARN_ON(test_len == 0); + if (copy_from_user(&test, useraddr, sizeof(test))) return -EFAULT; - test.len = ops->self_test_count(dev); - data = kmalloc(test.len * sizeof(u64), GFP_USER); + test.len = test_len; + data = kmalloc(test_len * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -684,19 +613,29 @@ static int ethtool_get_strings(struct ne if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) return -EFAULT; - switch (gstrings.string_set) { - case ETH_SS_TEST: - if (!ops->self_test_count) - return -EOPNOTSUPP; - gstrings.len = ops->self_test_count(dev); - break; - case ETH_SS_STATS: - if (!ops->get_stats_count) - return -EOPNOTSUPP; - gstrings.len = ops->get_stats_count(dev); - break; - default: - return -EINVAL; + if (ops->get_sset_count) { + ret = ops->get_sset_count(dev, gstrings.string_set); + if (ret < 0) + return ret; + + gstrings.len = ret; + } else { + /* code path for obsolete hooks */ + + switch (gstrings.string_set) { + case ETH_SS_TEST: + if (!ops->self_test_count) + return -EOPNOTSUPP; + gstrings.len = ops->self_test_count(dev); + break; + case ETH_SS_STATS: + if (!ops->get_stats_count) + return -EOPNOTSUPP; + gstrings.len = ops->get_stats_count(dev); + break; + default: + return -EINVAL; + } } data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); @@ -736,16 +675,27 @@ static int ethtool_get_stats(struct net_ struct ethtool_stats stats; const struct ethtool_ops *ops = dev->ethtool_ops; u64 *data; - int ret; + int ret, n_stats; - if (!ops->get_ethtool_stats || !ops->get_stats_count) + if (!ops->get_ethtool_stats) + return -EOPNOTSUPP; + if (!ops->get_sset_count && !ops->get_stats_count) return -EOPNOTSUPP; + if (ops->get_sset_count) + n_stats = ops->get_sset_count(dev, ETH_SS_STATS); + else + /* code path for obsolete hook */ + n_stats = ops->get_stats_count(dev); + if (n_stats < 0) + return n_stats; + WARN_ON(n_stats == 0); + if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; - stats.n_stats = ops->get_stats_count(dev); - data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); + stats.n_stats = n_stats; + data = kmalloc(n_stats * sizeof(u64), GFP_USER); if (!data) return -ENOMEM; @@ -783,6 +733,50 @@ static int ethtool_get_perm_addr(struct return 0; } +static int ethtool_get_value(struct net_device *dev, char __user *useraddr, + u32 cmd, u32 (*actor)(struct net_device *)) +{ + struct ethtool_value edata = { cmd }; + + if (!actor) + return -EOPNOTSUPP; + + edata.data = actor(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_value_void(struct net_device *dev, char __user *useraddr, + void (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + actor(dev, edata.data); + return 0; +} + +static int ethtool_set_value(struct net_device *dev, char __user *useraddr, + int (*actor)(struct net_device *, u32)) +{ + struct ethtool_value edata; + + if (!actor) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return actor(dev, edata.data); +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -817,6 +811,8 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GPERMADDR: case ETHTOOL_GUFO: case ETHTOOL_GGSO: + case ETHTOOL_GFLAGS: + case ETHTOOL_GPFLAGS: break; default: if (!capable(CAP_NET_ADMIN)) @@ -849,16 +845,19 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_wol(dev, useraddr); break; case ETHTOOL_GMSGLVL: - rc = ethtool_get_msglevel(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_msglevel); break; case ETHTOOL_SMSGLVL: - rc = ethtool_set_msglevel(dev, useraddr); + rc = ethtool_set_value_void(dev, useraddr, + dev->ethtool_ops->set_msglevel); break; case ETHTOOL_NWAY_RST: rc = ethtool_nway_reset(dev); break; case ETHTOOL_GLINK: - rc = ethtool_get_link(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_link); break; case ETHTOOL_GEEPROM: rc = ethtool_get_eeprom(dev, useraddr); @@ -885,25 +884,30 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_set_pauseparam(dev, useraddr); break; case ETHTOOL_GRXCSUM: - rc = ethtool_get_rx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_rx_csum(dev, useraddr); + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_rx_csum); break; case ETHTOOL_GTXCSUM: - rc = ethtool_get_tx_csum(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_tx_csum); break; case ETHTOOL_STXCSUM: rc = ethtool_set_tx_csum(dev, useraddr); break; case ETHTOOL_GSG: - rc = ethtool_get_sg(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_sg); break; case ETHTOOL_SSG: rc = ethtool_set_sg(dev, useraddr); break; case ETHTOOL_GTSO: - rc = ethtool_get_tso(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_tso); break; case ETHTOOL_STSO: rc = ethtool_set_tso(dev, useraddr); @@ -924,7 +928,8 @@ int dev_ethtool(struct ifreq *ifr) rc = ethtool_get_perm_addr(dev, useraddr); break; case ETHTOOL_GUFO: - rc = ethtool_get_ufo(dev, useraddr); + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_ufo); break; case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); @@ -935,6 +940,22 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SGSO: rc = ethtool_set_gso(dev, useraddr); break; + case ETHTOOL_GFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_flags); + break; + case ETHTOOL_SFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_flags); + break; + case ETHTOOL_GPFLAGS: + rc = ethtool_get_value(dev, useraddr, ethcmd, + dev->ethtool_ops->get_priv_flags); + break; + case ETHTOOL_SPFLAGS: + rc = ethtool_set_value(dev, useraddr, + dev->ethtool_ops->set_priv_flags); + break; default: rc = -EOPNOTSUPP; } @@ -959,3 +980,5 @@ EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum) EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum); EXPORT_SYMBOL(ethtool_op_set_ufo); EXPORT_SYMBOL(ethtool_op_get_ufo); +EXPORT_SYMBOL(ethtool_op_set_flags); +EXPORT_SYMBOL(ethtool_op_get_flags); diff -puN net/core/neighbour.c~git-net net/core/neighbour.c --- a/net/core/neighbour.c~git-net +++ a/net/core/neighbour.c @@ -55,9 +55,8 @@ #define PNEIGH_HASHMASK 0xF static void neigh_timer_handler(unsigned long arg); -#ifdef CONFIG_ARPD -static void neigh_app_notify(struct neighbour *n); -#endif +static void __neigh_notify(struct neighbour *n, int type, int flags); +static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); @@ -105,6 +104,15 @@ static int neigh_blackhole(struct sk_buf return -ENETDOWN; } +static void neigh_cleanup_and_release(struct neighbour *neigh) +{ + if (neigh->parms->neigh_cleanup) + neigh->parms->neigh_cleanup(neigh); + + __neigh_notify(neigh, RTM_DELNEIGH, 0); + neigh_release(neigh); +} + /* * It is random distribution in the interval (1/2)*base...(3/2)*base. * It corresponds to default IPv6 settings and is not overridable, @@ -141,9 +149,7 @@ static int neigh_forced_gc(struct neigh_ n->dead = 1; shrunk = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -214,9 +220,7 @@ static void neigh_flush_dev(struct neigh NEIGH_PRINTK2("neigh %p is stray.\n", n); } write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); } } } @@ -677,9 +681,7 @@ static void neigh_periodic_timer(unsigne *np = n->next; n->dead = 1; write_unlock(&n->lock); - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); + neigh_cleanup_and_release(n); continue; } write_unlock(&n->lock); @@ -828,13 +830,10 @@ static void neigh_timer_handler(unsigned out: write_unlock(&neigh->lock); } + if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + neigh_update_notify(neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif neigh_release(neigh); } @@ -1063,11 +1062,8 @@ out: write_unlock_bh(&neigh->lock); if (notify) - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif + neigh_update_notify(neigh); + return err; } @@ -2000,6 +1996,11 @@ nla_put_failure: return -EMSGSIZE; } +static void neigh_update_notify(struct neighbour *neigh) +{ + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); + __neigh_notify(neigh, RTM_NEWNEIGH, 0); +} static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) @@ -2095,11 +2096,8 @@ void __neigh_for_each_release(struct nei } else np = &n->next; write_unlock(&n->lock); - if (release) { - if (n->parms->neigh_cleanup) - n->parms->neigh_cleanup(n); - neigh_release(n); - } + if (release) + neigh_cleanup_and_release(n); } } } @@ -2422,7 +2420,6 @@ static const struct file_operations neig #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_ARPD static inline size_t neigh_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) @@ -2454,16 +2451,11 @@ errout: rtnl_set_sk_err(RTNLGRP_NEIGH, err); } +#ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); } - -static void neigh_app_notify(struct neighbour *n) -{ - __neigh_notify(n, RTM_NEWNEIGH, 0); -} - #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL diff -puN net/core/net-sysfs.c~git-net net/core/net-sysfs.c --- a/net/core/net-sysfs.c~git-net +++ a/net/core/net-sysfs.c @@ -216,20 +216,6 @@ static ssize_t store_tx_queue_len(struct return netdev_store(dev, attr, buf, len, change_tx_queue_len); } -NETDEVICE_SHOW(weight, fmt_dec); - -static int change_weight(struct net_device *net, unsigned long new_weight) -{ - net->weight = new_weight; - return 0; -} - -static ssize_t store_weight(struct device *dev, struct device_attribute *attr, - const char *buf, size_t len) -{ - return netdev_store(dev, attr, buf, len, change_weight); -} - static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(iflink, S_IRUGO, show_iflink, NULL), @@ -246,7 +232,6 @@ static struct device_attribute net_class __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), - __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), {} }; diff -puN net/core/netpoll.c~git-net net/core/netpoll.c --- a/net/core/netpoll.c~git-net +++ a/net/core/netpoll.c @@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_bu static void poll_napi(struct netpoll *np) { struct netpoll_info *npinfo = np->dev->npinfo; + struct napi_struct *napi; int budget = 16; - if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - npinfo->poll_owner != smp_processor_id() && - spin_trylock(&npinfo->poll_lock)) { - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); - - np->dev->poll(np->dev, &budget); - - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&npinfo->poll_lock); + list_for_each_entry(napi, &np->dev->napi_list, dev_list) { + if (test_bit(NAPI_STATE_SCHED, &napi->state) && + napi->poll_owner != smp_processor_id() && + spin_trylock(&napi->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; + atomic_inc(&trapped); + + napi->poll(napi, budget); + + atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&napi->poll_lock); + } } } @@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np) /* Process pending work on NIC */ np->dev->poll_controller(np->dev); - if (np->dev->poll) + if (!list_empty(&np->dev->napi_list)) poll_napi(np); service_arp_queue(np->dev->npinfo); @@ -233,6 +236,17 @@ repeat: return skb; } +static int netpoll_owner_active(struct net_device *dev) +{ + struct napi_struct *napi; + + list_for_each_entry(napi, &dev->napi_list, dev_list) { + if (napi->poll_owner == smp_processor_id()) + return 1; + } + return 0; +} + static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status = NETDEV_TX_BUSY; @@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netp } /* don't get messages out of order, and no recursion */ - if (skb_queue_len(&npinfo->txq) == 0 && - npinfo->poll_owner != smp_processor_id()) { + if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { unsigned long flags; local_irq_save(flags); @@ -519,6 +532,29 @@ out: return 0; } +void netpoll_print_options(struct netpoll *np) +{ + printk(KERN_INFO "%s: local port %d\n", + np->name, np->local_port); + printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->local_ip)); + printk(KERN_INFO "%s: interface %s\n", + np->name, np->dev_name); + printk(KERN_INFO "%s: remote port %d\n", + np->name, np->remote_port); + printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", + np->name, HIPQUAD(np->remote_ip)); + printk(KERN_INFO "%s: remote ethernet address " + "%02x:%02x:%02x:%02x:%02x:%02x\n", + np->name, + np->remote_mac[0], + np->remote_mac[1], + np->remote_mac[2], + np->remote_mac[3], + np->remote_mac[4], + np->remote_mac[5]); +} + int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; @@ -531,7 +567,6 @@ int netpoll_parse_options(struct netpoll cur = delim; } cur++; - printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) @@ -539,9 +574,6 @@ int netpoll_parse_options(struct netpoll *delim = 0; np->local_ip = ntohl(in_aton(cur)); cur = delim; - - printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->local_ip)); } cur++; @@ -555,8 +587,6 @@ int netpoll_parse_options(struct netpoll } cur++; - printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) @@ -566,7 +596,6 @@ int netpoll_parse_options(struct netpoll cur = delim; } cur++; - printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) @@ -575,9 +604,6 @@ int netpoll_parse_options(struct netpoll np->remote_ip = ntohl(in_aton(cur)); cur = delim + 1; - printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); - if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) @@ -608,15 +634,7 @@ int netpoll_parse_options(struct netpoll np->remote_mac[5] = simple_strtol(cur, NULL, 16); } - printk(KERN_INFO "%s: remote ethernet address " - "%02x:%02x:%02x:%02x:%02x:%02x\n", - np->name, - np->remote_mac[0], - np->remote_mac[1], - np->remote_mac[2], - np->remote_mac[3], - np->remote_mac[4], - np->remote_mac[5]); + netpoll_print_options(np); return 0; @@ -652,8 +670,6 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_flags = 0; npinfo->rx_np = NULL; - spin_lock_init(&npinfo->poll_lock); - npinfo->poll_owner = -1; spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); @@ -820,6 +836,7 @@ void netpoll_set_trap(int trap) EXPORT_SYMBOL(netpoll_set_trap); EXPORT_SYMBOL(netpoll_trap); +EXPORT_SYMBOL(netpoll_print_options); EXPORT_SYMBOL(netpoll_parse_options); EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); diff -puN net/core/pktgen.c~git-net net/core/pktgen.c --- a/net/core/pktgen.c~git-net +++ a/net/core/pktgen.c @@ -164,7 +164,7 @@ #include /* do_div */ #include -#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" +#define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" /* The buckets are exponential in 'width' */ #define LAT_BUCKETS_MAX 32 @@ -186,6 +186,7 @@ #define F_SVID_RND (1<<10) /* Random SVLAN ID */ #define F_FLOW_SEQ (1<<11) /* Sequential flows */ #define F_IPSEC_ON (1<<12) /* ipsec on for flows */ +#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ /* Thread control flag bits */ #define T_TERMINATE (1<<0) @@ -328,6 +329,7 @@ struct pktgen_dev { __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; + __u16 cur_queue_map; __u32 cur_pkt_size; __u8 hh[14]; @@ -355,6 +357,10 @@ struct pktgen_dev { unsigned lflow; /* Flow length (config) */ unsigned nflows; /* accumulated flows (stats) */ unsigned curfl; /* current sequenced flow (state)*/ + + u16 queue_map_min; + u16 queue_map_max; + #ifdef CONFIG_XFRM __u8 ipsmode; /* IPSEC mode (config) */ __u8 ipsproto; /* IPSEC type (config) */ @@ -375,7 +381,6 @@ struct pktgen_thread { struct list_head th_list; struct task_struct *tsk; char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ /* Field for thread to receive "posted" events terminate, stop ifs etc. */ @@ -610,6 +615,11 @@ static int pktgen_if_show(struct seq_fil seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); + seq_printf(seq, + " queue_map_min: %u queue_map_max: %u\n", + pkt_dev->queue_map_min, + pkt_dev->queue_map_max); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -706,6 +716,9 @@ static int pktgen_if_show(struct seq_fil if (pkt_dev->flags & F_MPLS_RND) seq_printf(seq, "MPLS_RND "); + if (pkt_dev->flags & F_QUEUE_MAP_RND) + seq_printf(seq, "QUEUE_MAP_RND "); + if (pkt_dev->cflows) { if (pkt_dev->flags & F_FLOW_SEQ) seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ @@ -761,6 +774,8 @@ static int pktgen_if_show(struct seq_fil seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); + seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map); + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) @@ -1212,6 +1227,11 @@ static ssize_t pktgen_if_write(struct fi else if (strcmp(f, "FLOW_SEQ") == 0) pkt_dev->flags |= F_FLOW_SEQ; + else if (strcmp(f, "QUEUE_MAP_RND") == 0) + pkt_dev->flags |= F_QUEUE_MAP_RND; + + else if (strcmp(f, "!QUEUE_MAP_RND") == 0) + pkt_dev->flags &= ~F_QUEUE_MAP_RND; #ifdef CONFIG_XFRM else if (strcmp(f, "IPSEC") == 0) pkt_dev->flags |= F_IPSEC_ON; @@ -1516,6 +1536,28 @@ static ssize_t pktgen_if_write(struct fi return count; } + if (!strcmp(name, "queue_map_min")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_min = value; + sprintf(pg_result, "OK: queue_map_min=%u", pkt_dev->queue_map_min); + return count; + } + + if (!strcmp(name, "queue_map_max")) { + len = num_arg(&user_buffer[i], 5, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->queue_map_max = value; + sprintf(pg_result, "OK: queue_map_max=%u", pkt_dev->queue_map_max); + return count; + } + if (!strcmp(name, "mpls")) { unsigned n, offset; len = get_labels(&user_buffer[i], pkt_dev); @@ -1708,9 +1750,6 @@ static int pktgen_thread_show(struct seq BUG_ON(!t); - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->tsk->comm, t->max_before_softirq); - seq_printf(seq, "Running: "); if_lock(t); @@ -1743,7 +1782,6 @@ static ssize_t pktgen_thread_write(struc int i = 0, max, len, ret; char name[40]; char *pg_result; - unsigned long value = 0; if (count < 1) { // sprintf(pg_result, "Wrong command format"); @@ -1817,12 +1855,8 @@ static ssize_t pktgen_thread_write(struc } if (!strcmp(name, "max_before_softirq")) { - len = num_arg(&user_buffer[i], 10, &value); - mutex_lock(&pktgen_thread_lock); - t->max_before_softirq = value; - mutex_unlock(&pktgen_thread_lock); + sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use"); ret = count; - sprintf(pg_result, "OK: max_before_softirq=%lu", value); goto out; } @@ -2101,7 +2135,6 @@ static void spin(struct pktgen_dev *pkt_ if (spin_until_us - now > jiffies_to_usecs(1) + 1) schedule_timeout_interruptible(1); else if (spin_until_us - now > 100) { - do_softirq(); if (!pkt_dev->running) return; if (need_resched()) @@ -2377,6 +2410,20 @@ static void mod_cur_headers(struct pktge pkt_dev->cur_pkt_size = t; } + if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) { + __u16 t; + if (pkt_dev->flags & F_QUEUE_MAP_RND) { + t = random32() % + (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1) + + pkt_dev->queue_map_min; + } else { + t = pkt_dev->cur_queue_map + 1; + if (t > pkt_dev->queue_map_max) + t = pkt_dev->queue_map_min; + } + pkt_dev->cur_queue_map = t; + } + pkt_dev->flows[flow].count++; } @@ -2547,6 +2594,7 @@ static struct sk_buff *fill_packet_ipv4( skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb->queue_mapping = pkt_dev->cur_queue_map; iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2888,6 +2936,7 @@ static struct sk_buff *fill_packet_ipv6( skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb->queue_mapping = pkt_dev->cur_queue_map; iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -3455,8 +3504,6 @@ static int pktgen_thread_worker(void *ar struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - u32 max_before_softirq; - u32 tx_since_softirq = 0; BUG_ON(smp_processor_id() != cpu); @@ -3464,8 +3511,6 @@ static int pktgen_thread_worker(void *ar pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid); - max_before_softirq = t->max_before_softirq; - set_current_state(TASK_INTERRUPTIBLE); set_freezable(); @@ -3484,24 +3529,9 @@ static int pktgen_thread_worker(void *ar __set_current_state(TASK_RUNNING); - if (pkt_dev) { - + if (pkt_dev) pktgen_xmit(pkt_dev); - /* - * We like to stay RUNNING but must also give - * others fair share. - */ - - tx_since_softirq += pkt_dev->last_ok; - - if (tx_since_softirq > max_before_softirq) { - if (local_softirq_pending()) - do_softirq(); - tx_since_softirq = 0; - } - } - if (t->control & T_STOP) { pktgen_stop(t); t->control &= ~(T_STOP); diff -puN net/core/rtnetlink.c~git-net net/core/rtnetlink.c --- a/net/core/rtnetlink.c~git-net +++ a/net/core/rtnetlink.c @@ -634,7 +634,6 @@ static int rtnl_fill_ifinfo(struct sk_bu NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); NLA_PUT_U8(skb, IFLA_OPERSTATE, netif_running(dev) ? dev->operstate : IF_OPER_DOWN); NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); @@ -714,7 +713,7 @@ cont: return skb->len; } -static const struct nla_policy ifla_policy[IFLA_MAX+1] = { +const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -834,9 +833,6 @@ static int do_setlink(struct net_device if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); @@ -941,6 +937,48 @@ static int rtnl_dellink(struct sk_buff * return 0; } +struct net_device *rtnl_create_link(char *ifname, + const struct rtnl_link_ops *ops, struct nlattr *tb[]) +{ + int err; + struct net_device *dev; + + err = -ENOMEM; + dev = alloc_netdev(ops->priv_size, ifname, ops->setup); + if (!dev) + goto err; + + if (strchr(dev->name, '%')) { + err = dev_alloc_name(dev, dev->name); + if (err < 0) + goto err_free; + } + + dev->rtnl_link_ops = ops; + + if (tb[IFLA_MTU]) + dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_ADDRESS]) + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); + if (tb[IFLA_BROADCAST]) + memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), + nla_len(tb[IFLA_BROADCAST])); + if (tb[IFLA_TXQLEN]) + dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (tb[IFLA_OPERSTATE]) + set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (tb[IFLA_LINKMODE]) + dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + + return dev; + +err_free: + free_netdev(dev); +err: + return ERR_PTR(err); +} + static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { const struct rtnl_link_ops *ops; @@ -1053,40 +1091,17 @@ replay: if (!ifname[0]) snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = alloc_netdev(ops->priv_size, ifname, ops->setup); - if (!dev) - return -ENOMEM; - - if (strchr(dev->name, '%')) { - err = dev_alloc_name(dev, dev->name); - if (err < 0) - goto err_free; - } - dev->rtnl_link_ops = ops; - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); - if (tb[IFLA_ADDRESS]) - memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), - nla_len(tb[IFLA_ADDRESS])); - if (tb[IFLA_BROADCAST]) - memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), - nla_len(tb[IFLA_BROADCAST])); - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - if (tb[IFLA_LINKMODE]) - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + dev = rtnl_create_link(ifname, ops, tb); - if (ops->newlink) + if (IS_ERR(dev)) + err = PTR_ERR(dev); + else if (ops->newlink) err = ops->newlink(dev, tb, data); else err = register_netdevice(dev); -err_free: - if (err < 0) + + if (err < 0 && !IS_ERR(dev)) free_netdev(dev); return err; } @@ -1335,3 +1350,5 @@ EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); EXPORT_SYMBOL(rtnl_notify); EXPORT_SYMBOL(rtnl_set_sk_err); +EXPORT_SYMBOL(rtnl_create_link); +EXPORT_SYMBOL(ifla_policy); diff -puN net/dccp/ackvec.c~git-net net/dccp/ackvec.c --- a/net/dccp/ackvec.c~git-net +++ a/net/dccp/ackvec.c @@ -69,21 +69,20 @@ int dccp_insert_option_ackvec(struct soc struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = (av->dccpav_vec_len + - DCCP_MAX_ACKVEC_OPT_LEN - 1) / - DCCP_MAX_ACKVEC_OPT_LEN; + const u16 nr_opts = DIV_ROUND_UP(av->dccpav_vec_len, + DCCP_MAX_ACKVEC_OPT_LEN); u16 len = av->dccpav_vec_len + 2 * nr_opts, i; - struct timeval now; u32 elapsed_time; const unsigned char *tail, *from; unsigned char *to; struct dccp_ackvec_record *avr; + suseconds_t delta; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) return -1; - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; + delta = ktime_us_delta(ktime_get_real(), av->dccpav_time); + elapsed_time = delta / 10; if (elapsed_time != 0 && dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) @@ -159,8 +158,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(co av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = UINT48_MAX + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_time.tv_sec = 0; - av->dccpav_time.tv_usec = 0; + av->dccpav_time = ktime_set(0, 0); av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } @@ -321,7 +319,7 @@ int dccp_ackvec_add(struct dccp_ackvec * } av->dccpav_buf_ackno = ackno; - dccp_timestamp(sk, &av->dccpav_time); + av->dccpav_time = ktime_get_real(); out: return 0; diff -puN net/dccp/ackvec.h~git-net net/dccp/ackvec.h --- a/net/dccp/ackvec.h~git-net +++ a/net/dccp/ackvec.h @@ -12,8 +12,8 @@ */ #include +#include #include -#include #include /* Read about the ECN nonce to see why it is 253 */ @@ -52,7 +52,7 @@ struct dccp_ackvec { u64 dccpav_buf_ackno; struct list_head dccpav_records; - struct timeval dccpav_time; + ktime_t dccpav_time; u16 dccpav_buf_head; u16 dccpav_vec_len; u8 dccpav_buf_nonce; diff -puN net/dccp/ccids/ccid3.c~git-net net/dccp/ccids/ccid3.c --- a/net/dccp/ccids/ccid3.c~git-net +++ a/net/dccp/ccids/ccid3.c @@ -128,7 +128,7 @@ static inline void ccid3_update_send_int * throughout the code. Only X_calc is unscaled (in bytes/second). * */ -static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now) +static void ccid3_hc_tx_update_x(struct sock *sk) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); @@ -153,14 +153,18 @@ static void ccid3_hc_tx_update_x(struct (((__u64)hctx->ccid3hctx_s) << 6) / TFRC_T_MBI); - } else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) - - (suseconds_t)hctx->ccid3hctx_rtt >= 0) { + } else { + const ktime_t now = ktime_get_real(); - hctx->ccid3hctx_x = - max(min(2 * hctx->ccid3hctx_x, min_rate), - scaled_div(((__u64)hctx->ccid3hctx_s) << 6, - hctx->ccid3hctx_rtt)); - hctx->ccid3hctx_t_ld = *now; + if ((ktime_us_delta(now, hctx->ccid3hctx_t_ld) - + (s64)hctx->ccid3hctx_rtt) >= 0) { + + hctx->ccid3hctx_x = + max(min(2 * hctx->ccid3hctx_x, min_rate), + scaled_div(((__u64)hctx->ccid3hctx_s) << 6, + hctx->ccid3hctx_rtt)); + hctx->ccid3hctx_t_ld = now; + } } if (hctx->ccid3hctx_x != old_x) { @@ -214,7 +218,6 @@ static void ccid3_hc_tx_no_feedback_time { struct sock *sk = (struct sock *)data; struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -265,15 +268,12 @@ static void ccid3_hc_tx_no_feedback_time max(hctx->ccid3hctx_x_recv / 2, (((__u64)hctx->ccid3hctx_s) << 6) / (2 * TFRC_T_MBI)); - - if (hctx->ccid3hctx_p == 0) - dccp_timestamp(sk, &now); } else { hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc; hctx->ccid3hctx_x_recv <<= 4; } /* Now recalculate X [RFC 3448, 4.3, step (4)] */ - ccid3_hc_tx_update_x(sk, &now); + ccid3_hc_tx_update_x(sk); /* * Schedule no feedback timer to expire in * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) @@ -341,7 +341,7 @@ static int ccid3_hc_tx_send_packet(struc ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; hctx->ccid3hctx_x = rfc3390_initial_rate(sk); - hctx->ccid3hctx_t_ld = ktime_to_timeval(now); + hctx->ccid3hctx_t_ld = now; } else { /* Sender does not have RTT sample: X = MSS/second */ hctx->ccid3hctx_x = dp->dccps_mss_cache; @@ -388,7 +388,6 @@ static void ccid3_hc_tx_packet_sent(stru unsigned int len) { struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - struct timeval now; struct dccp_tx_hist_entry *packet; BUG_ON(hctx == NULL); @@ -402,8 +401,7 @@ static void ccid3_hc_tx_packet_sent(stru } dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, packet); - dccp_timestamp(sk, &now); - packet->dccphtx_tstamp = now; + packet->dccphtx_tstamp = ktime_get_real(); packet->dccphtx_seqno = dccp_sk(sk)->dccps_gss; packet->dccphtx_rtt = hctx->ccid3hctx_rtt; packet->dccphtx_sent = 1; @@ -414,7 +412,7 @@ static void ccid3_hc_tx_packet_recv(stru struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; - struct timeval now; + ktime_t now; unsigned long t_nfb; u32 pinv, r_sample; @@ -452,13 +450,12 @@ static void ccid3_hc_tx_packet_recv(stru else /* can not exceed 100% */ hctx->ccid3hctx_p = 1000000 / pinv; - dccp_timestamp(sk, &now); - + now = ktime_get_real(); /* * Calculate new round trip sample as per [RFC 3448, 4.3] by * R_sample = (now - t_recvdata) - t_elapsed */ - r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp); + r_sample = dccp_sample_rtt(sk, now, &packet->dccphtx_tstamp); /* * Update RTT estimate by @@ -496,7 +493,7 @@ static void ccid3_hc_tx_packet_recv(stru tfrc_calc_x(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt, hctx->ccid3hctx_p); - ccid3_hc_tx_update_x(sk, &now); + ccid3_hc_tx_update_x(sk); ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, " "p=%u, X_calc=%u, X_recv=%u, X=%u\n", @@ -729,20 +726,20 @@ static void ccid3_hc_rx_send_feedback(st struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct dccp_rx_hist_entry *packet; - struct timeval now; + ktime_t now; suseconds_t delta; ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); - dccp_timestamp(sk, &now); + now = ktime_get_real(); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: hcrx->ccid3hcrx_x_recv = 0; break; case TFRC_RSTATE_DATA: - delta = timeval_delta(&now, - &hcrx->ccid3hcrx_tstamp_last_feedback); + delta = ktime_us_delta(now, + hcrx->ccid3hcrx_tstamp_last_feedback); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); @@ -764,7 +761,7 @@ static void ccid3_hc_rx_send_feedback(st hcrx->ccid3hcrx_bytes_recv = 0; /* Elapsed time information [RFC 4340, 13.2] in units of 10 * usecs */ - delta = timeval_delta(&now, &packet->dccphrx_tstamp); + delta = ktime_us_delta(now, packet->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); hcrx->ccid3hcrx_elapsed_time = delta / 10; @@ -839,7 +836,7 @@ static int ccid3_hc_rx_detect_loss(struc dccp_li_update_li(sk, &hcrx->ccid3hcrx_li_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_tstamp_last_feedback, + hcrx->ccid3hcrx_tstamp_last_feedback, hcrx->ccid3hcrx_s, hcrx->ccid3hcrx_bytes_recv, hcrx->ccid3hcrx_x_recv, @@ -876,9 +873,9 @@ static void ccid3_hc_rx_packet_recv(stru struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); const struct dccp_options_received *opt_recv; struct dccp_rx_hist_entry *packet; - struct timeval now; u32 p_prev, r_sample, rtt_prev; int loss, payload_size; + ktime_t now; BUG_ON(hcrx == NULL); @@ -892,8 +889,8 @@ static void ccid3_hc_rx_packet_recv(stru if (opt_recv->dccpor_timestamp_echo == 0) break; rtt_prev = hcrx->ccid3hcrx_rtt; - dccp_timestamp(sk, &now); - r_sample = dccp_sample_rtt(sk, &now, NULL); + now = ktime_get_real(); + r_sample = dccp_sample_rtt(sk, now, NULL); if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) hcrx->ccid3hcrx_rtt = r_sample; @@ -912,7 +909,7 @@ static void ccid3_hc_rx_packet_recv(stru return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, skb, GFP_ATOMIC); if (unlikely(packet == NULL)) { DCCP_WARN("%s(%p), Not enough mem to add rx packet " @@ -941,9 +938,9 @@ static void ccid3_hc_rx_packet_recv(stru if (loss) break; - dccp_timestamp(sk, &now); - if ((timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) - - (suseconds_t)hcrx->ccid3hcrx_rtt) >= 0) { + now = ktime_get_real(); + if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - + (s64)hcrx->ccid3hcrx_rtt) >= 0) { hcrx->ccid3hcrx_tstamp_last_ack = now; ccid3_hc_rx_send_feedback(sk); } @@ -984,8 +981,8 @@ static int ccid3_hc_rx_init(struct ccid hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); - hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; + hcrx->ccid3hcrx_tstamp_last_feedback = + hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); hcrx->ccid3hcrx_s = 0; hcrx->ccid3hcrx_rtt = 0; return 0; diff -puN net/dccp/ccids/ccid3.h~git-net net/dccp/ccids/ccid3.h --- a/net/dccp/ccids/ccid3.h~git-net +++ a/net/dccp/ccids/ccid3.h @@ -38,7 +38,6 @@ #include #include -#include #include #include #include "../ccid.h" @@ -111,7 +110,7 @@ struct ccid3_hc_tx_sock { u8 ccid3hctx_idle; ktime_t ccid3hctx_t_last_win_count; struct timer_list ccid3hctx_no_feedback_timer; - struct timeval ccid3hctx_t_ld; + ktime_t ccid3hctx_t_ld; ktime_t ccid3hctx_t_nom; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; @@ -153,8 +152,8 @@ struct ccid3_hc_rx_sock { ccid3hcrx_ccval_last_counter:4; enum ccid3_hc_rx_states ccid3hcrx_state:8; u32 ccid3hcrx_bytes_recv; - struct timeval ccid3hcrx_tstamp_last_feedback; - struct timeval ccid3hcrx_tstamp_last_ack; + ktime_t ccid3hcrx_tstamp_last_feedback; + ktime_t ccid3hcrx_tstamp_last_ack; struct list_head ccid3hcrx_hist; struct list_head ccid3hcrx_li_hist; u16 ccid3hcrx_s; diff -puN net/dccp/ccids/lib/loss_interval.c~git-net net/dccp/ccids/lib/loss_interval.c --- a/net/dccp/ccids/lib/loss_interval.c~git-net +++ a/net/dccp/ccids/lib/loss_interval.c @@ -125,14 +125,14 @@ static int dccp_li_hist_interval_new(str * returns estimated loss interval in usecs */ static u32 dccp_li_calc_first_li(struct sock *sk, struct list_head *hist_list, - struct timeval *last_feedback, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv) { struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 x_recv, p; suseconds_t rtt, delta; - struct timeval tstamp = { 0, 0 }; + ktime_t tstamp = ktime_set(0, 0); int interval = 0; int win_count = 0; int step = 0; @@ -176,7 +176,7 @@ found: return ~0; } - delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); + delta = ktime_us_delta(tstamp, tail->dccphrx_tstamp); DCCP_BUG_ON(delta < 0); rtt = delta * 4 / interval; @@ -196,8 +196,7 @@ found: return ~0; } - dccp_timestamp(sk, &tstamp); - delta = timeval_delta(&tstamp, last_feedback); + delta = ktime_us_delta(ktime_get_real(), last_feedback); DCCP_BUG_ON(delta <= 0); x_recv = scaled_div32(bytes_recv, delta); @@ -226,7 +225,7 @@ found: void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, u32 bytes_recv, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss) { struct dccp_li_hist_entry *head; diff -puN net/dccp/ccids/lib/loss_interval.h~git-net net/dccp/ccids/lib/loss_interval.h --- a/net/dccp/ccids/lib/loss_interval.h~git-net +++ a/net/dccp/ccids/lib/loss_interval.h @@ -13,8 +13,8 @@ * any later version. */ +#include #include -#include extern void dccp_li_hist_purge(struct list_head *list); @@ -23,7 +23,7 @@ extern u32 dccp_li_hist_calc_i_mean(stru extern void dccp_li_update_li(struct sock *sk, struct list_head *li_hist_list, struct list_head *hist_list, - struct timeval *last_feedback, u16 s, + ktime_t last_feedback, u16 s, u32 bytes_recv, u32 previous_x_recv, u64 seq_loss, u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff -puN net/dccp/ccids/lib/packet_history.h~git-net net/dccp/ccids/lib/packet_history.h --- a/net/dccp/ccids/lib/packet_history.h~git-net +++ a/net/dccp/ccids/lib/packet_history.h @@ -37,9 +37,9 @@ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ +#include #include #include -#include #include "../../dccp.h" @@ -57,7 +57,7 @@ struct dccp_tx_hist_entry { u64 dccphtx_seqno:48, dccphtx_sent:1; u32 dccphtx_rtt; - struct timeval dccphtx_tstamp; + ktime_t dccphtx_tstamp; }; struct dccp_tx_hist { @@ -124,7 +124,7 @@ struct dccp_rx_hist_entry { dccphrx_ccval:4, dccphrx_type:4; u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval dccphrx_tstamp; + ktime_t dccphrx_tstamp; }; struct dccp_rx_hist { @@ -136,7 +136,6 @@ extern void dccp_rx_hist_delete(struct static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, - const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const gfp_t prio) @@ -151,7 +150,7 @@ static inline struct dccp_rx_hist_entry entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - dccp_timestamp(sk, &entry->dccphrx_tstamp); + entry->dccphrx_tstamp = ktime_get_real(); } return entry; diff -puN net/dccp/dccp.h~git-net net/dccp/dccp.h --- a/net/dccp/dccp.h~git-net +++ a/net/dccp/dccp.h @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -296,8 +297,8 @@ extern int dccp_v4_connect(struct soc extern int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code); extern void dccp_send_close(struct sock *sk, const int active); extern int dccp_invalid_packet(struct sk_buff *skb); -extern u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_history); +extern u32 dccp_sample_rtt(struct sock *sk, ktime_t t_recv, + ktime_t *t_history); static inline int dccp_bad_service_code(const struct sock *sk, const __be32 service) @@ -401,47 +402,6 @@ extern int dccp_insert_option(struct soc unsigned char option, const void *value, unsigned char len); -extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); - -static inline suseconds_t timeval_usecs(const struct timeval *tv) -{ - return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; -} - -static inline suseconds_t timeval_delta(const struct timeval *large, - const struct timeval *small) -{ - time_t secs = large->tv_sec - small->tv_sec; - suseconds_t usecs = large->tv_usec - small->tv_usec; - - if (usecs < 0) { - secs--; - usecs += USEC_PER_SEC; - } - return secs * USEC_PER_SEC + usecs; -} - -static inline void timeval_add_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec += usecs; - while (tv->tv_usec >= USEC_PER_SEC) { - tv->tv_sec++; - tv->tv_usec -= USEC_PER_SEC; - } -} - -static inline void timeval_sub_usecs(struct timeval *tv, - const suseconds_t usecs) -{ - tv->tv_usec -= usecs; - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } - DCCP_BUG_ON(tv->tv_sec < 0); -} - #ifdef CONFIG_SYSCTL extern int dccp_sysctl_init(void); extern void dccp_sysctl_exit(void); diff -puN net/dccp/input.c~git-net net/dccp/input.c --- a/net/dccp/input.c~git-net +++ a/net/dccp/input.c @@ -301,12 +301,10 @@ static int dccp_rcv_request_sent_state_p goto out_invalid_packet; /* Obtain RTT sample from SYN exchange (used by CCID 3) */ - if (dp->dccps_options_received.dccpor_timestamp_echo) { - struct timeval now; - - dccp_timestamp(sk, &now); - dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL); - } + if (dp->dccps_options_received.dccpor_timestamp_echo) + dp->dccps_syn_rtt = dccp_sample_rtt(sk, + ktime_get_real(), + NULL); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, @@ -593,22 +591,21 @@ EXPORT_SYMBOL_GPL(dccp_rcv_state_process * @t_recv: receive timestamp of packet with timestamp echo * @t_hist: packet history timestamp or NULL */ -u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_hist) +u32 dccp_sample_rtt(struct sock *sk, ktime_t t_recv, ktime_t *t_hist) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_options_received *or = &dp->dccps_options_received; - suseconds_t delta; + s64 delta; if (t_hist == NULL) { if (!or->dccpor_timestamp_echo) { DCCP_WARN("packet without timestamp echo\n"); return DCCP_SANE_RTT_MAX; } - timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10); - delta = timeval_usecs(t_recv); + ktime_sub_us(t_recv, or->dccpor_timestamp_echo * 10); + delta = ktime_to_us(t_recv); } else - delta = timeval_delta(t_recv, t_hist); + delta = ktime_us_delta(t_recv, *t_hist); delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */ @@ -616,7 +613,7 @@ u32 dccp_sample_rtt(struct sock *sk, str DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta); return DCCP_SANE_RTT_MIN; } - if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) { + if (unlikely(delta - (s64)DCCP_SANE_RTT_MAX > 0)) { DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta); return DCCP_SANE_RTT_MAX; } diff -puN net/dccp/minisocks.c~git-net net/dccp/minisocks.c --- a/net/dccp/minisocks.c~git-net +++ a/net/dccp/minisocks.c @@ -112,7 +112,6 @@ struct sock *dccp_create_openreq_child(s newdp->dccps_service_list = NULL; newdp->dccps_service = dreq->dreq_service; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; - do_gettimeofday(&newdp->dccps_epoch); if (dccp_feat_clone(sk, newsk)) goto out_free; diff -puN net/dccp/options.c~git-net net/dccp/options.c --- a/net/dccp/options.c~git-net +++ a/net/dccp/options.c @@ -158,7 +158,7 @@ int dccp_parse_options(struct sock *sk, opt_recv->dccpor_timestamp = ntohl(*(__be32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dccp_timestamp(sk, &dp->dccps_timestamp_time); + dp->dccps_timestamp_time = ktime_get_real(); dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n", dccp_role(sk), opt_recv->dccpor_timestamp, @@ -370,29 +370,9 @@ int dccp_insert_option_elapsed_time(stru EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); -void dccp_timestamp(const struct sock *sk, struct timeval *tv) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - do_gettimeofday(tv); - tv->tv_sec -= dp->dccps_epoch.tv_sec; - tv->tv_usec -= dp->dccps_epoch.tv_usec; - - while (tv->tv_usec < 0) { - tv->tv_sec--; - tv->tv_usec += USEC_PER_SEC; - } -} - -EXPORT_SYMBOL_GPL(dccp_timestamp); - int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - struct timeval tv; - __be32 now; - - dccp_timestamp(sk, &tv); - now = htonl(timeval_usecs(&tv) / 10); + __be32 now = htonl(((suseconds_t)ktime_to_us(ktime_get_real())) / 10); /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -405,14 +385,12 @@ static int dccp_insert_option_timestamp_ struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct timeval now; __be32 tstamp_echo; - u32 elapsed_time; int len, elapsed_time_len; unsigned char *to; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + const suseconds_t delta = ktime_us_delta(ktime_get_real(), + dp->dccps_timestamp_time); + u32 elapsed_time = delta / 10; elapsed_time_len = dccp_elapsed_time_len(elapsed_time); len = 6 + elapsed_time_len; @@ -438,8 +416,7 @@ static int dccp_insert_option_timestamp_ } dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time.tv_sec = 0; - dp->dccps_timestamp_time.tv_usec = 0; + dp->dccps_timestamp_time = ktime_set(0, 0); return 0; } diff -puN net/dccp/proto.c~git-net net/dccp/proto.c --- a/net/dccp/proto.c~git-net +++ a/net/dccp/proto.c @@ -172,7 +172,6 @@ int dccp_init_sock(struct sock *sk, cons struct inet_connection_sock *icsk = inet_csk(sk); dccp_minisock_init(&dp->dccps_minisock); - do_gettimeofday(&dp->dccps_epoch); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes diff -puN net/ethernet/eth.c~git-net net/ethernet/eth.c --- a/net/ethernet/eth.c~git-net +++ a/net/ethernet/eth.c @@ -91,10 +91,10 @@ int eth_header(struct sk_buff *skb, stru if (!saddr) saddr = dev->dev_addr; - memcpy(eth->h_source, saddr, dev->addr_len); + memcpy(eth->h_source, saddr, ETH_ALEN); if (daddr) { - memcpy(eth->h_dest, daddr, dev->addr_len); + memcpy(eth->h_dest, daddr, ETH_ALEN); return ETH_HLEN; } @@ -103,7 +103,7 @@ int eth_header(struct sk_buff *skb, stru */ if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { - memset(eth->h_dest, 0, dev->addr_len); + memset(eth->h_dest, 0, ETH_ALEN); return ETH_HLEN; } @@ -135,7 +135,7 @@ int eth_rebuild_header(struct sk_buff *s "%s: unable to resolve type %X addresses.\n", dev->name, (int)eth->h_proto); - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); break; } @@ -233,8 +233,8 @@ int eth_header_cache(struct neighbour *n return -1; eth->h_proto = type; - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); - memcpy(eth->h_dest, neigh->ha, dev->addr_len); + memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); + memcpy(eth->h_dest, neigh->ha, ETH_ALEN); hh->hh_len = ETH_HLEN; return 0; } @@ -251,7 +251,7 @@ void eth_header_cache_update(struct hh_c unsigned char *haddr) { memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), - haddr, dev->addr_len); + haddr, ETH_ALEN); } /** @@ -271,7 +271,7 @@ static int eth_mac_addr(struct net_devic return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } diff -puN net/ieee80211/ieee80211_crypt_ccmp.c~git-net net/ieee80211/ieee80211_crypt_ccmp.c --- a/net/ieee80211/ieee80211_crypt_ccmp.c~git-net +++ a/net/ieee80211/ieee80211_crypt_ccmp.c @@ -9,6 +9,7 @@ * more details. */ +#include #include #include #include @@ -241,7 +242,7 @@ static int ieee80211_ccmp_encrypt(struct hdr = (struct ieee80211_hdr_4addr *)skb->data; ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); - blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last = data_len % AES_BLOCK_LEN; for (i = 1; i <= blocks; i++) { @@ -351,7 +352,7 @@ static int ieee80211_ccmp_decrypt(struct ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); xor_block(mic, b, CCMP_MIC_LEN); - blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last = data_len % AES_BLOCK_LEN; for (i = 1; i <= blocks; i++) { diff -puN net/ipv4/Kconfig~git-net net/ipv4/Kconfig --- a/net/ipv4/Kconfig~git-net +++ a/net/ipv4/Kconfig @@ -394,6 +394,14 @@ config INET_XFRM_MODE_BEET If unsure, say Y. +config INET_LRO + tristate "Large Receive Offload (ipv4/tcp)" + + ---help--- + Support for Large Receive Offload (ipv4/tcp). + + If unsure, say Y. + config INET_DIAG tristate "INET: socket monitoring interface" default y diff -puN net/ipv4/Makefile~git-net net/ipv4/Makefile --- a/net/ipv4/Makefile~git-net +++ a/net/ipv4/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_INET_ESP) += esp4.o obj-$(CONFIG_INET_IPCOMP) += ipcomp.o obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o +obj-$(CONFIG_INET_LRO) += inet_lro.o obj-$(CONFIG_INET_TUNNEL) += tunnel4.o obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o diff -puN net/ipv4/fib_trie.c~git-net net/ipv4/fib_trie.c --- a/net/ipv4/fib_trie.c~git-net +++ a/net/ipv4/fib_trie.c @@ -85,23 +85,14 @@ #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) -#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l)) -#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset)) typedef unsigned int t_key; #define T_TNODE 0 #define T_LEAF 1 #define NODE_TYPE_MASK 0x1UL -#define NODE_PARENT(node) \ - ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK))) - #define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK) -#define NODE_SET_PARENT(node, ptr) \ - rcu_assign_pointer((node)->parent, \ - ((unsigned long)(ptr)) | NODE_TYPE(node)) - #define IS_TNODE(n) (!(n->parent & T_LEAF)) #define IS_LEAF(n) (n->parent & T_LEAF) @@ -174,6 +165,19 @@ static void tnode_free(struct tnode *tn) static struct kmem_cache *fn_alias_kmem __read_mostly; static struct trie *trie_local = NULL, *trie_main = NULL; +static inline struct tnode *node_parent(struct node *node) +{ + struct tnode *ret; + + ret = (struct tnode *)(node->parent & ~NODE_TYPE_MASK); + return rcu_dereference(ret); +} + +static inline void node_set_parent(struct node *node, struct tnode *ptr) +{ + rcu_assign_pointer(node->parent, + (unsigned long)ptr | NODE_TYPE(node)); +} /* rcu_read_lock needs to be hold by caller from readside */ @@ -189,6 +193,11 @@ static inline int tnode_child_length(con return 1 << tn->bits; } +static inline t_key mask_pfx(t_key k, unsigned short l) +{ + return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l); +} + static inline t_key tkey_extract_bits(t_key a, int offset, int bits) { if (offset < KEYLENGTH) @@ -446,7 +455,7 @@ static void tnode_put_child_reorg(struct tn->full_children++; if (n) - NODE_SET_PARENT(n, tn); + node_set_parent(n, tn); rcu_assign_pointer(tn->child[i], n); } @@ -481,7 +490,7 @@ static struct node *resize(struct trie * continue; /* compress one level */ - NODE_SET_PARENT(n, NULL); + node_set_parent(n, NULL); tnode_free(tn); return n; } @@ -636,7 +645,7 @@ static struct node *resize(struct trie * /* compress one level */ - NODE_SET_PARENT(n, NULL); + node_set_parent(n, NULL); tnode_free(tn); return n; } @@ -673,7 +682,7 @@ static struct tnode *inflate(struct trie inode->pos == oldtnode->pos + oldtnode->bits && inode->bits > 1) { struct tnode *left, *right; - t_key m = TKEY_GET_MASK(inode->pos, 1); + t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos; left = tnode_new(inode->key&(~m), inode->pos + 1, inode->bits - 1); @@ -961,24 +970,21 @@ fib_find_node(struct trie *t, u32 key) static struct node *trie_rebalance(struct trie *t, struct tnode *tn) { int wasfull; - t_key cindex, key; - struct tnode *tp = NULL; + t_key cindex, key = tn->key; + struct tnode *tp; - key = tn->key; - - while (tn != NULL && NODE_PARENT(tn) != NULL) { - - tp = NODE_PARENT(tn); + while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) { cindex = tkey_extract_bits(key, tp->pos, tp->bits); wasfull = tnode_full(tp, tnode_get_child(tp, cindex)); tn = (struct tnode *) resize (t, (struct tnode *)tn); tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull); - if (!NODE_PARENT(tn)) + tp = node_parent((struct node *) tn); + if (!tp) break; - - tn = NODE_PARENT(tn); + tn = tp; } + /* Handle last (top) tnode */ if (IS_TNODE(tn)) tn = (struct tnode*) resize(t, (struct tnode *)tn); @@ -1031,7 +1037,7 @@ fib_insert_node(struct trie *t, int *err pos = tn->pos + tn->bits; n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits)); - BUG_ON(n && NODE_PARENT(n) != tn); + BUG_ON(n && node_parent(n) != tn); } else break; } @@ -1083,7 +1089,7 @@ fib_insert_node(struct trie *t, int *err if (t->trie && n == NULL) { /* Case 2: n is NULL, and will just insert a new leaf */ - NODE_SET_PARENT(l, tp); + node_set_parent((struct node *)l, tp); cindex = tkey_extract_bits(key, tp->pos, tp->bits); put_child(t, (struct tnode *)tp, cindex, (struct node *)l); @@ -1114,7 +1120,7 @@ fib_insert_node(struct trie *t, int *err goto err; } - NODE_SET_PARENT(tn, tp); + node_set_parent((struct node *)tn, tp); missbit = tkey_extract_bits(key, newpos, 1); put_child(t, tn, missbit, (struct node *)l); @@ -1364,7 +1370,8 @@ fn_trie_lookup(struct fib_table *tb, con bits = pn->bits; if (!chopped_off) - cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits); + cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), + pos, bits); n = tnode_get_child(pn, cindex); @@ -1450,8 +1457,8 @@ fn_trie_lookup(struct fib_table *tb, con * to find a matching prefix. */ - node_prefix = MASK_PFX(cn->key, cn->pos); - key_prefix = MASK_PFX(key, cn->pos); + node_prefix = mask_pfx(cn->key, cn->pos); + key_prefix = mask_pfx(key, cn->pos); pref_mismatch = key_prefix^node_prefix; mp = 0; @@ -1495,12 +1502,13 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - if (NODE_PARENT(pn) == NULL) + struct tnode *parent = node_parent((struct node *) pn); + if (!parent) goto failed; /* Get Child's index */ - cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits); - pn = NODE_PARENT(pn); + cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits); + pn = parent; chopped_off = 0; #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1536,7 +1544,7 @@ static int trie_leaf_remove(struct trie check_tnode(tn); n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits)); - BUG_ON(n && NODE_PARENT(n) != tn); + BUG_ON(n && node_parent(n) != tn); } l = (struct leaf *) n; @@ -1551,7 +1559,7 @@ static int trie_leaf_remove(struct trie t->revision++; t->size--; - tp = NODE_PARENT(n); + tp = node_parent(n); tnode_free((struct tnode *) n); if (tp) { @@ -1703,7 +1711,7 @@ static struct leaf *nextleaf(struct trie p = (struct tnode*) trie; /* Start */ } else - p = (struct tnode *) NODE_PARENT(c); + p = node_parent(c); while (p) { int pos, last; @@ -1740,7 +1748,7 @@ static struct leaf *nextleaf(struct trie up: /* No more children go up one step */ c = (struct node *) p; - p = (struct tnode *) NODE_PARENT(p); + p = node_parent(c); } return NULL; /* Ready. Root of trie */ } @@ -2043,7 +2051,7 @@ rescan: } /* Current node exhausted, pop back up */ - p = NODE_PARENT(tn); + p = node_parent((struct node *)tn); if (p) { cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1; tn = p; @@ -2317,7 +2325,7 @@ static int fib_trie_seq_show(struct seq_ if (v == SEQ_START_TOKEN) return 0; - if (!NODE_PARENT(n)) { + if (!node_parent(n)) { if (iter->trie == trie_local) seq_puts(seq, ":\n"); else @@ -2326,7 +2334,7 @@ static int fib_trie_seq_show(struct seq_ if (IS_TNODE(n)) { struct tnode *tn = (struct tnode *) n; - __be32 prf = htonl(MASK_PFX(tn->key, tn->pos)); + __be32 prf = htonl(mask_pfx(tn->key, tn->pos)); seq_indent(seq, iter->depth-1); seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", diff -puN net/ipv4/inet_diag.c~git-net net/ipv4/inet_diag.c --- a/net/ipv4/inet_diag.c~git-net +++ a/net/ipv4/inet_diag.c @@ -11,6 +11,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -112,7 +113,7 @@ static int inet_csk_diag_fill(struct soc } #endif -#define EXPIRES_IN_MS(tmo) ((tmo - jiffies) * 1000 + HZ - 1) / HZ +#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->idiag_timer = 1; @@ -190,7 +191,7 @@ static int inet_twsk_diag_fill(struct in r->id.idiag_dst[0] = tw->tw_daddr; r->idiag_state = tw->tw_substate; r->idiag_timer = 3; - r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); r->idiag_rqueue = 0; r->idiag_wqueue = 0; r->idiag_uid = 0; diff -puN /dev/null net/ipv4/inet_lro.c --- /dev/null +++ a/net/ipv4/inet_lro.c @@ -0,0 +1,600 @@ +/* + * linux/net/ipv4/inet_lro.c + * + * Large Receive Offload (ipv4 / tcp) + * + * (C) Copyright IBM Corp. 2007 + * + * Authors: + * Jan-Bernd Themann + * Christoph Raisch + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jan-Bernd Themann "); +MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); + +#define TCP_HDR_LEN(tcph) (tcph->doff << 2) +#define IP_HDR_LEN(iph) (iph->ihl << 2) +#define TCP_PAYLOAD_LENGTH(iph, tcph) \ + (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) + +#define IPH_LEN_WO_OPTIONS 5 +#define TCPH_LEN_WO_OPTIONS 5 +#define TCPH_LEN_W_TIMESTAMP 8 + +#define LRO_MAX_PG_HLEN 64 + +#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } + +/* + * Basic tcp checks whether packet is suitable for LRO + */ + +static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, + int len, struct net_lro_desc *lro_desc) +{ + /* check ip header: don't aggregate padded frames */ + if (ntohs(iph->tot_len) != len) + return -1; + + if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) + return -1; + + if (iph->ihl != IPH_LEN_WO_OPTIONS) + return -1; + + if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack + || tcph->rst || tcph->syn || tcph->fin) + return -1; + + if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) + return -1; + + if (tcph->doff != TCPH_LEN_WO_OPTIONS + && tcph->doff != TCPH_LEN_W_TIMESTAMP) + return -1; + + /* check tcp options (only timestamp allowed) */ + if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { + u32 *topt = (u32 *)(tcph + 1); + + if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) + | TCPOLEN_TIMESTAMP)) + return -1; + + /* timestamp should be in right order */ + topt++; + if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), + ntohl(*topt))) + return -1; + + /* timestamp reply should not be zero */ + topt++; + if (*topt == 0) + return -1; + } + + return 0; +} + +static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) +{ + struct iphdr *iph = lro_desc->iph; + struct tcphdr *tcph = lro_desc->tcph; + u32 *p; + __wsum tcp_hdr_csum; + + tcph->ack_seq = lro_desc->tcp_ack; + tcph->window = lro_desc->tcp_window; + + if (lro_desc->tcp_saw_tstamp) { + p = (u32 *)(tcph + 1); + *(p+2) = lro_desc->tcp_rcv_tsecr; + } + + iph->tot_len = htons(lro_desc->ip_tot_len); + + iph->check = 0; + iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); + + tcph->check = 0; + tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); + tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + lro_desc->ip_tot_len - + IP_HDR_LEN(iph), IPPROTO_TCP, + lro_desc->data_csum); +} + +static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) +{ + __wsum tcp_csum; + __wsum tcp_hdr_csum; + __wsum tcp_ps_hdr_csum; + + tcp_csum = ~csum_unfold(tcph->check); + tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + + tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + len + TCP_HDR_LEN(tcph), + IPPROTO_TCP, 0); + + return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), + tcp_ps_hdr_csum); +} + +static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, + struct iphdr *iph, struct tcphdr *tcph, + u16 vlan_tag, struct vlan_group *vgrp) +{ + int nr_frags; + u32 *ptr; + u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + nr_frags = skb_shinfo(skb)->nr_frags; + lro_desc->parent = skb; + lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); + lro_desc->iph = iph; + lro_desc->tcph = tcph; + lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; + lro_desc->tcp_ack = ntohl(tcph->ack_seq); + lro_desc->tcp_window = tcph->window; + + lro_desc->pkt_aggr_cnt = 1; + lro_desc->ip_tot_len = ntohs(iph->tot_len); + + if (tcph->doff == 8) { + ptr = (u32 *)(tcph+1); + lro_desc->tcp_saw_tstamp = 1; + lro_desc->tcp_rcv_tsval = *(ptr+1); + lro_desc->tcp_rcv_tsecr = *(ptr+2); + } + + lro_desc->mss = tcp_data_len; + lro_desc->vgrp = vgrp; + lro_desc->vlan_tag = vlan_tag; + lro_desc->active = 1; + + lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, + tcp_data_len); +} + +static inline void lro_clear_desc(struct net_lro_desc *lro_desc) +{ + memset(lro_desc, 0, sizeof(struct net_lro_desc)); +} + +static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, + struct tcphdr *tcph, int tcp_data_len) +{ + struct sk_buff *parent = lro_desc->parent; + u32 *topt; + + lro_desc->pkt_aggr_cnt++; + lro_desc->ip_tot_len += tcp_data_len; + lro_desc->tcp_next_seq += tcp_data_len; + lro_desc->tcp_window = tcph->window; + lro_desc->tcp_ack = tcph->ack_seq; + + /* don't update tcp_rcv_tsval, would not work with PAWS */ + if (lro_desc->tcp_saw_tstamp) { + topt = (u32 *) (tcph + 1); + lro_desc->tcp_rcv_tsecr = *(topt + 2); + } + + lro_desc->data_csum = csum_block_add(lro_desc->data_csum, + lro_tcp_data_csum(iph, tcph, + tcp_data_len), + parent->len); + + parent->len += tcp_data_len; + parent->data_len += tcp_data_len; + if (tcp_data_len > lro_desc->mss) + lro_desc->mss = tcp_data_len; +} + +static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *parent = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len); + + skb_pull(skb, (skb->len - tcp_data_len)); + parent->truesize += skb->truesize; + + if (lro_desc->last_skb) + lro_desc->last_skb->next = skb; + else + skb_shinfo(parent)->frag_list = skb; + + lro_desc->last_skb = skb; +} + +static void lro_add_frags(struct net_lro_desc *lro_desc, + int len, int hlen, int truesize, + struct skb_frag_struct *skb_frags, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct sk_buff *skb = lro_desc->parent; + int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); + + lro_add_common(lro_desc, iph, tcph, tcp_data_len); + + skb->truesize += truesize; + + skb_frags[0].page_offset += hlen; + skb_frags[0].size -= hlen; + + while (tcp_data_len > 0) { + *(lro_desc->next_frag) = *skb_frags; + tcp_data_len -= skb_frags->size; + lro_desc->next_frag++; + skb_frags++; + skb_shinfo(skb)->nr_frags++; + } +} + +static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, + struct iphdr *iph, + struct tcphdr *tcph) +{ + if ((lro_desc->iph->saddr != iph->saddr) + || (lro_desc->iph->daddr != iph->daddr) + || (lro_desc->tcph->source != tcph->source) + || (lro_desc->tcph->dest != tcph->dest)) + return -1; + return 0; +} + +static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, + struct net_lro_desc *lro_arr, + struct iphdr *iph, + struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc = NULL; + struct net_lro_desc *tmp; + int max_desc = lro_mgr->max_desc; + int i; + + for (i = 0; i < max_desc; i++) { + tmp = &lro_arr[i]; + if (tmp->active) + if (!lro_check_tcp_conn(tmp, iph, tcph)) { + lro_desc = tmp; + goto out; + } + } + + for (i = 0; i < max_desc; i++) { + if (!lro_arr[i].active) { + lro_desc = &lro_arr[i]; + goto out; + } + } + + LRO_INC_STATS(lro_mgr, no_desc); +out: + return lro_desc; +} + +static void lro_flush(struct net_lro_mgr *lro_mgr, + struct net_lro_desc *lro_desc) +{ + if (lro_desc->pkt_aggr_cnt > 1) + lro_update_tcp_ip_header(lro_desc); + + skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; + + if (lro_desc->vgrp) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(lro_desc->parent, + lro_desc->vgrp, + lro_desc->vlan_tag); + else + vlan_hwaccel_rx(lro_desc->parent, + lro_desc->vgrp, + lro_desc->vlan_tag); + + } else { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(lro_desc->parent); + else + netif_rx(lro_desc->parent); + } + + LRO_INC_STATS(lro_mgr, flushed); + lro_clear_desc(lro_desc); +} + +static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, + struct vlan_group *vgrp, u16 vlan_tag, void *priv) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + u64 flags; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_skb_header + || lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, + &flags, priv)) + goto out; + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) + && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + vlan_hdr_len = VLAN_HLEN; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) + goto out; + + skb->ip_summed = lro_mgr->ip_summed_aggr; + lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); + LRO_INC_STATS(lro_mgr, aggregated); + return 0; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) + goto out2; + + lro_add_packet(lro_desc, skb, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return 0; + +out2: /* send aggregated SKBs to stack */ + lro_flush(lro_mgr, lro_desc); + +out: /* Original SKB has to be posted to stack */ + skb->ip_summed = lro_mgr->ip_summed; + return 1; +} + + +static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + void *mac_hdr, + int hlen, __wsum sum, + u32 ip_summed) +{ + struct sk_buff *skb; + struct skb_frag_struct *skb_frags; + int data_len = len; + int hdr_len = min(len, hlen); + + skb = netdev_alloc_skb(lro_mgr->dev, hlen); + if (!skb) + return NULL; + + skb->len = len; + skb->data_len = len - hdr_len; + skb->truesize += true_size; + skb->tail += hdr_len; + + memcpy(skb->data, mac_hdr, hdr_len); + + skb_frags = skb_shinfo(skb)->frags; + while (data_len > 0) { + *skb_frags = *frags; + data_len -= frags->size; + skb_frags++; + frags++; + skb_shinfo(skb)->nr_frags++; + } + + skb_shinfo(skb)->frags[0].page_offset += hdr_len; + skb_shinfo(skb)->frags[0].size -= hdr_len; + + skb->ip_summed = ip_summed; + skb->csum = sum; + skb->protocol = eth_type_trans(skb, lro_mgr->dev); + return skb; +} + +static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, void *priv, __wsum sum) +{ + struct net_lro_desc *lro_desc; + struct iphdr *iph; + struct tcphdr *tcph; + struct sk_buff *skb; + u64 flags; + void *mac_hdr; + int mac_hdr_len; + int hdr_len = LRO_MAX_PG_HLEN; + int vlan_hdr_len = 0; + + if (!lro_mgr->get_frag_header + || lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, + (void *)&tcph, &flags, priv)) { + mac_hdr = page_address(frags->page) + frags->page_offset; + goto out1; + } + + if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) + goto out1; + + hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); + mac_hdr_len = (int)((void *)(iph) - mac_hdr); + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (!lro_desc) + goto out1; + + if (!lro_desc->active) { /* start new lro session */ + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) + goto out1; + + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, 0, lro_mgr->ip_summed_aggr); + if (!skb) + goto out; + + if ((skb->protocol == htons(ETH_P_8021Q)) + && !test_bit(LRO_F_EXTRACT_VLAN_ID, &lro_mgr->features)) + vlan_hdr_len = VLAN_HLEN; + + iph = (void *)(skb->data + vlan_hdr_len); + tcph = (void *)((u8 *)skb->data + vlan_hdr_len + + IP_HDR_LEN(iph)); + + lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); + LRO_INC_STATS(lro_mgr, aggregated); + return 0; + } + + if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) + goto out2; + + if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) + goto out2; + + lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); + LRO_INC_STATS(lro_mgr, aggregated); + + if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || + lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) + lro_flush(lro_mgr, lro_desc); + + return NULL; + +out2: /* send aggregated packets to the stack */ + lro_flush(lro_mgr, lro_desc); + +out1: /* Original packet has to be posted to the stack */ + skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, + hdr_len, sum, lro_mgr->ip_summed); +out: + return skb; +} + +void lro_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + void *priv) +{ + if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(skb); + else + netif_rx(skb); + } +} +EXPORT_SYMBOL(lro_receive_skb); + +void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, + struct sk_buff *skb, + struct vlan_group *vgrp, + u16 vlan_tag, + void *priv) +{ + if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); + else + vlan_hwaccel_rx(skb, vgrp, vlan_tag); + } +} +EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); + +void lro_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, + priv, sum); + if (!skb) + return; + + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + netif_receive_skb(skb); + else + netif_rx(skb); +} +EXPORT_SYMBOL(lro_receive_frags); + +void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, + struct skb_frag_struct *frags, + int len, int true_size, + struct vlan_group *vgrp, + u16 vlan_tag, void *priv, __wsum sum) +{ + struct sk_buff *skb; + + skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, + vlan_tag, priv, sum); + if (!skb) + return; + + if (test_bit(LRO_F_NAPI, &lro_mgr->features)) + vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); + else + vlan_hwaccel_rx(skb, vgrp, vlan_tag); +} +EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); + +void lro_flush_all(struct net_lro_mgr *lro_mgr) +{ + int i; + struct net_lro_desc *lro_desc = lro_mgr->lro_arr; + + for (i = 0; i < lro_mgr->max_desc; i++) { + if (lro_desc[i].active) + lro_flush(lro_mgr, &lro_desc[i]); + } +} +EXPORT_SYMBOL(lro_flush_all); + +void lro_flush_pkt(struct net_lro_mgr *lro_mgr, + struct iphdr *iph, struct tcphdr *tcph) +{ + struct net_lro_desc *lro_desc; + + lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); + if (lro_desc->active) + lro_flush(lro_mgr, lro_desc); +} +EXPORT_SYMBOL(lro_flush_pkt); diff -puN net/ipv4/inet_timewait_sock.c~git-net net/ipv4/inet_timewait_sock.c --- a/net/ipv4/inet_timewait_sock.c~git-net +++ a/net/ipv4/inet_timewait_sock.c @@ -8,7 +8,7 @@ * From code orinally in TCP */ - +#include #include #include #include @@ -292,7 +292,7 @@ void inet_twsk_schedule(struct inet_time if (timeo >= timewait_len) { slot = INET_TWDR_TWKILL_SLOTS - 1; } else { - slot = (timeo + twdr->period - 1) / twdr->period; + slot = DIV_ROUND_UP(timeo, twdr->period); if (slot >= INET_TWDR_TWKILL_SLOTS) slot = INET_TWDR_TWKILL_SLOTS - 1; } diff -puN net/ipv4/ip_forward.c~git-net net/ipv4/ip_forward.c --- a/net/ipv4/ip_forward.c~git-net +++ a/net/ipv4/ip_forward.c @@ -105,7 +105,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff -puN net/ipv4/proc.c~git-net net/ipv4/proc.c --- a/net/ipv4/proc.c~git-net +++ a/net/ipv4/proc.c @@ -244,6 +244,9 @@ static const struct snmp_mib snmp4_net_l SNMP_MIB_ITEM("TCPAbortOnLinger", LINUX_MIB_TCPABORTONLINGER), SNMP_MIB_ITEM("TCPAbortFailed", LINUX_MIB_TCPABORTFAILED), SNMP_MIB_ITEM("TCPMemoryPressures", LINUX_MIB_TCPMEMORYPRESSURES), + SNMP_MIB_ITEM("TCPSACKDiscard", LINUX_MIB_TCPSACKDISCARD), + SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), + SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_SENTINEL }; diff -puN net/ipv4/tcp.c~git-net net/ipv4/tcp.c --- a/net/ipv4/tcp.c~git-net +++ a/net/ipv4/tcp.c @@ -247,6 +247,7 @@ * TCP_CLOSE socket is finished */ +#include #include #include #include @@ -2014,7 +2015,7 @@ void tcp_get_info(struct sock *sk, struc if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) info->tcpi_options |= TCPI_OPT_SACK; if (tp->rx_opt.wscale_ok) { info->tcpi_options |= TCPI_OPT_WSCALE; @@ -2210,7 +2211,7 @@ struct sk_buff *tcp_tso_segment(struct s goto out; mss = skb_shinfo(skb)->gso_size; - skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); segs = NULL; goto out; diff -puN net/ipv4/tcp_bic.c~git-net net/ipv4/tcp_bic.c --- a/net/ipv4/tcp_bic.c~git-net +++ a/net/ipv4/tcp_bic.c @@ -210,7 +210,7 @@ static void bictcp_acked(struct sock *sk { const struct inet_connection_sock *icsk = inet_csk(sk); - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; diff -puN net/ipv4/tcp_cubic.c~git-net net/ipv4/tcp_cubic.c --- a/net/ipv4/tcp_cubic.c~git-net +++ a/net/ipv4/tcp_cubic.c @@ -314,7 +314,7 @@ static void bictcp_acked(struct sock *sk struct bictcp *ca = inet_csk_ca(sk); u32 delay; - if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } diff -puN net/ipv4/tcp_input.c~git-net net/ipv4/tcp_input.c --- a/net/ipv4/tcp_input.c~git-net +++ a/net/ipv4/tcp_input.c @@ -111,13 +111,10 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) -#define IsReno(tp) ((tp)->rx_opt.sack_ok == 0) -#define IsFack(tp) ((tp)->rx_opt.sack_ok & 2) -#define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4) - #define IsSackFrto() (sysctl_tcp_frto == 0x2) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) +#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) /* Adapt the MSS value used to make delayed ack decision to the * real world. @@ -198,6 +195,55 @@ static inline int tcp_in_quickack_mode(c return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; } +static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) +{ + if (tp->ecn_flags&TCP_ECN_OK) + tp->ecn_flags |= TCP_ECN_QUEUE_CWR; +} + +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (tcp_hdr(skb)->cwr) + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +static inline void TCP_ECN_withdraw_cwr(struct tcp_sock *tp) +{ + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (tp->ecn_flags&TCP_ECN_OK) { + if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + /* Funny extension: if ECT is not set on a segment, + * it is surely retransmit. It is not in ECN RFC, + * but Linux follows this rule. */ + else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) + tcp_enter_quickack_mode((struct sock *)tp); + } +} + +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +{ + if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; +} + +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +{ + if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr)) + tp->ecn_flags &= ~TCP_ECN_OK; +} + +static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +{ + if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK)) + return 1; + return 0; +} + /* Buffer size and advertised window tuning. * * 1. Tuning sk->sk_sndbuf, when connection enters established state. @@ -810,6 +856,21 @@ void tcp_enter_cwr(struct sock *sk, cons } } +/* + * Packet counting of FACK is based on in-order assumptions, therefore TCP + * disables it when reordering is detected + */ +static void tcp_disable_fack(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok &= ~2; +} + +/* Take a notice that peer is sending DSACKs */ +static void tcp_dsack_seen(struct tcp_sock *tp) +{ + tp->rx_opt.sack_ok |= 4; +} + /* Initialize metrics on socket. */ static void tcp_init_metrics(struct sock *sk) @@ -831,7 +892,7 @@ static void tcp_init_metrics(struct sock } if (dst_metric(dst, RTAX_REORDERING) && tp->reordering != dst_metric(dst, RTAX_REORDERING)) { - tp->rx_opt.sack_ok &= ~2; + tcp_disable_fack(tp); tp->reordering = dst_metric(dst, RTAX_REORDERING); } @@ -893,9 +954,9 @@ static void tcp_update_reordering(struct /* This exciting event is worth to be remembered. 8) */ if (ts) NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER); - else if (IsReno(tp)) + else if (tcp_is_reno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER); - else if (IsFack(tp)) + else if (tcp_is_fack(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); @@ -907,8 +968,7 @@ static void tcp_update_reordering(struct tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - /* Disable FACK yet. */ - tp->rx_opt.sack_ok &= ~2; + tcp_disable_fack(tp); } } @@ -959,7 +1019,119 @@ static void tcp_update_reordering(struct * for retransmitted and already SACKed segment -> reordering.. * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. + * + * SACK block validation. + * ---------------------- + * + * SACK block range validation checks that the received SACK block fits to + * the expected sequence limits, i.e., it is between SND.UNA and SND.NXT. + * Note that SND.UNA is not included to the range though being valid because + * it means that the receiver is rather inconsistent with itself (reports + * SACK reneging when it should advance SND.UNA). + * + * Implements also blockage to start_seq wrap-around. Problem lies in the + * fact that though start_seq (s) is before end_seq (i.e., not reversed), + * there's no guarantee that it will be before snd_nxt (n). The problem + * happens when start_seq resides between end_seq wrap (e_w) and snd_nxt + * wrap (s_w): + * + * <- outs wnd -> <- wrapzone -> + * u e n u_w e_w s n_w + * | | | | | | | + * |<------------+------+----- TCP seqno space --------------+---------->| + * ...-- <2^31 ->| |<--------... + * ...---- >2^31 ------>| |<--------... + * + * Current code wouldn't be vulnerable but it's better still to discard such + * crazy SACK blocks. Doing this check for start_seq alone closes somewhat + * similar case (end_seq after snd_nxt wrap) as earlier reversed check in + * snd_nxt wrap -> snd_una region will then become "well defined", i.e., + * equal to the ideal case (infinite seqno space without wrap caused issues). + * + * With D-SACK the lower bound is extended to cover sequence space below + * SND.UNA down to undo_marker, which is the last point of interest. Yet + * again, DSACK block must not to go across snd_una (for the same reason as + * for the normal SACK blocks, explained above). But there all simplicity + * ends, TCP might receive valid D-SACKs below that. As long as they reside + * fully below undo_marker they do not affect behavior in anyway and can + * therefore be safely ignored. In rare cases (which are more or less + * theoretical ones), the D-SACK will nicely cross that boundary due to skb + * fragmentation and packet reordering past skb's retransmission. To consider + * them correctly, the acceptable range must be extended even more though + * the exact amount is rather hard to quantify. However, tp->max_window can + * be used as an exaggerated estimate. */ +static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, + u32 start_seq, u32 end_seq) +{ + /* Too far in future, or reversed (interpretation is ambiguous) */ + if (after(end_seq, tp->snd_nxt) || !before(start_seq, end_seq)) + return 0; + + /* Nasty start_seq wrap-around check (see comments above) */ + if (!before(start_seq, tp->snd_nxt)) + return 0; + + /* In outstanding window? ...This is valid exit for DSACKs too. + * start_seq == snd_una is non-sensical (see comments above) + */ + if (after(start_seq, tp->snd_una)) + return 1; + + if (!is_dsack || !tp->undo_marker) + return 0; + + /* ...Then it's D-SACK, and must reside below snd_una completely */ + if (!after(end_seq, tp->snd_una)) + return 0; + + if (!before(start_seq, tp->undo_marker)) + return 1; + + /* Too old */ + if (!after(end_seq, tp->undo_marker)) + return 0; + + /* Undo_marker boundary crossing (overestimates a lot). Known already: + * start_seq < undo_marker and end_seq >= undo_marker. + */ + return !before(start_seq, end_seq - tp->max_window); +} + + +static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, + struct tcp_sack_block_wire *sp, int num_sacks, + u32 prior_snd_una) +{ + u32 start_seq_0 = ntohl(get_unaligned(&sp[0].start_seq)); + u32 end_seq_0 = ntohl(get_unaligned(&sp[0].end_seq)); + int dup_sack = 0; + + if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { + dup_sack = 1; + tcp_dsack_seen(tp); + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); + } else if (num_sacks > 1) { + u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq)); + u32 start_seq_1 = ntohl(get_unaligned(&sp[1].start_seq)); + + if (!after(end_seq_0, end_seq_1) && + !before(start_seq_0, start_seq_1)) { + dup_sack = 1; + tcp_dsack_seen(tp); + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); + } + } + + /* D-SACK for already forgotten data... Do dumb counting. */ + if (dup_sack && + !after(end_seq_0, prior_snd_una) && + after(end_seq_0, tp->undo_marker)) + tp->undo_retrans--; + + return dup_sack; +} + static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { @@ -979,31 +1151,16 @@ tcp_sacktag_write_queue(struct sock *sk, int i; int first_sack_index; - if (!tp->sacked_out) + if (!tp->sacked_out) { tp->fackets_out = 0; + tp->highest_sack = tp->snd_una; + } prior_fackets = tp->fackets_out; - /* Check for D-SACK. */ - if (before(ntohl(sp[0].start_seq), TCP_SKB_CB(ack_skb)->ack_seq)) { - flag |= FLAG_DSACKING_ACK; - found_dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); - } else if (num_sacks > 1 && - !after(ntohl(sp[0].end_seq), ntohl(sp[1].end_seq)) && - !before(ntohl(sp[0].start_seq), ntohl(sp[1].start_seq))) { + found_dup_sack = tcp_check_dsack(tp, ack_skb, sp, + num_sacks, prior_snd_una); + if (found_dup_sack) flag |= FLAG_DSACKING_ACK; - found_dup_sack = 1; - tp->rx_opt.sack_ok |= 4; - NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV); - } - - /* D-SACK for already forgotten data... - * Do dumb counting. */ - if (found_dup_sack && - !after(ntohl(sp[0].end_seq), prior_snd_una) && - after(ntohl(sp[0].end_seq), tp->undo_marker)) - tp->undo_retrans--; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1083,6 +1240,17 @@ tcp_sacktag_write_queue(struct sock *sk, int fack_count; int dup_sack = (found_dup_sack && (i == first_sack_index)); + if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) { + if (dup_sack) { + if (!tp->undo_marker) + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO); + else + NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD); + } else + NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD); + continue; + } + skb = cached_skb; fack_count = cached_fack_count; @@ -1217,6 +1385,10 @@ tcp_sacktag_write_queue(struct sock *sk, if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; + + if (after(TCP_SKB_CB(skb)->seq, + tp->highest_sack)) + tp->highest_sack = TCP_SKB_CB(skb)->seq; } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1254,7 +1426,7 @@ tcp_sacktag_write_queue(struct sock *sk, continue; if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) && after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) && - (IsFack(tp) || + (tcp_is_fack(tp) || !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq + tp->reordering * tp->mss_cache))) { @@ -1274,7 +1446,7 @@ tcp_sacktag_write_queue(struct sock *sk, } } - tp->left_out = tp->sacked_out + tp->lost_out; + tcp_verify_left_out(tp); if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss && (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) @@ -1292,6 +1464,52 @@ tcp_sacktag_write_queue(struct sock *sk, /* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ +static void tcp_check_reno_reordering(struct sock *sk, const int addend) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 holes; + + holes = max(tp->lost_out, 1U); + holes = min(holes, tp->packets_out); + + if ((tp->sacked_out + holes) > tp->packets_out) { + tp->sacked_out = tp->packets_out - holes; + tcp_update_reordering(sk, tp->packets_out + addend, 0); + } +} + +/* Emulate SACKs for SACKless connection: account for a new dupack. */ + +static void tcp_add_reno_sack(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + tp->sacked_out++; + tcp_check_reno_reordering(sk, 0); + tcp_verify_left_out(tp); +} + +/* Account for ACK, ACKing some data in Reno Recovery phase. */ + +static void tcp_remove_reno_sacks(struct sock *sk, int acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (acked > 0) { + /* One ACK acked hole. The rest eat duplicate ACKs. */ + if (acked-1 >= tp->sacked_out) + tp->sacked_out = 0; + else + tp->sacked_out -= acked-1; + } + tcp_check_reno_reordering(sk, acked); + tcp_verify_left_out(tp); +} + +static inline void tcp_reset_reno_sack(struct tcp_sock *tp) +{ + tp->sacked_out = 0; +} + int tcp_use_frto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -1380,7 +1598,7 @@ void tcp_enter_frto(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. @@ -1405,17 +1623,15 @@ static void tcp_enter_frto_loss(struct s { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int cnt = 0; - tp->sacked_out = 0; tp->lost_out = 0; - tp->fackets_out = 0; tp->retrans_out = 0; + if (tcp_is_reno(tp)) + tcp_reset_reno_sack(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) break; - cnt += tcp_skb_pcount(skb); /* * Count the retransmission made on RTO correctly (only when * waiting for the first ACK and did not get it)... @@ -1429,22 +1645,15 @@ static void tcp_enter_frto_loss(struct s } else { TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); } - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { - /* Do not mark those segments lost that were - * forward transmitted after RTO - */ - if (!after(TCP_SKB_CB(skb)->end_seq, - tp->frto_highmark)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - } - } else { - tp->sacked_out += tcp_skb_pcount(skb); - tp->fackets_out = cnt; + /* Don't lost mark skbs that were fwd transmitted after RTO */ + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) && + !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out += tcp_skb_pcount(skb); } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; tp->snd_cwnd_cnt = 0; @@ -1463,7 +1672,6 @@ static void tcp_enter_frto_loss(struct s void tcp_clear_retrans(struct tcp_sock *tp) { - tp->left_out = 0; tp->retrans_out = 0; tp->fackets_out = 0; @@ -1520,7 +1728,7 @@ void tcp_enter_loss(struct sock *sk, int tp->fackets_out = cnt; } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); @@ -1560,7 +1768,7 @@ static int tcp_check_sack_reneging(struc static inline int tcp_fackets_out(struct tcp_sock *tp) { - return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; + return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out; } static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) @@ -1708,55 +1916,18 @@ static int tcp_time_to_recover(struct so return 0; } -/* If we receive more dupacks than we expected counting segments - * in assumption of absent reordering, interpret this as reordering. - * The only another reason could be bug in receiver TCP. - */ -static void tcp_check_reno_reordering(struct sock *sk, const int addend) -{ - struct tcp_sock *tp = tcp_sk(sk); - u32 holes; - - holes = max(tp->lost_out, 1U); - holes = min(holes, tp->packets_out); - - if ((tp->sacked_out + holes) > tp->packets_out) { - tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(sk, tp->packets_out + addend, 0); - } -} - -/* Emulate SACKs for SACKless connection: account for a new dupack. */ - -static void tcp_add_reno_sack(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - tp->sacked_out++; - tcp_check_reno_reordering(sk, 0); - tcp_sync_left_out(tp); -} - -/* Account for ACK, ACKing some data in Reno Recovery phase. */ - -static void tcp_remove_reno_sacks(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (acked > 0) { - /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) - tp->sacked_out = 0; - else - tp->sacked_out -= acked-1; - } - tcp_check_reno_reordering(sk, acked); - tcp_sync_left_out(tp); -} - -static inline void tcp_reset_reno_sack(struct tcp_sock *tp) -{ - tp->sacked_out = 0; - tp->left_out = tp->lost_out; +/* RFC: This is from the original, I doubt that this is necessary at all: + * clear xmit_retrans hint if seq of this skb is beyond hint. How could we + * retransmitted past LOST markings in the first place? I'm not fully sure + * about undo and end of connection cases, which can cause R without L? + */ +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, + struct sk_buff *skb) +{ + if ((tp->retransmit_skb_hint != NULL) && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + tp->retransmit_skb_hint = NULL; } /* Mark head of queue up as lost. */ @@ -1789,17 +1960,10 @@ static void tcp_mark_head_lost(struct so if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - - /* clear xmit_retransmit_queue hints - * if this is beyond hint */ - if (tp->retransmit_skb_hint != NULL && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; - + tcp_verify_retransmit_hint(tp, skb); } } - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); } /* Account newly detected lost packet(s) */ @@ -1808,7 +1972,7 @@ static void tcp_update_scoreboard(struct { struct tcp_sock *tp = tcp_sk(sk); - if (IsFack(tp)) { + if (tcp_is_fack(tp)) { int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; @@ -1822,7 +1986,7 @@ static void tcp_update_scoreboard(struct * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (!IsReno(tp) && tcp_head_timedout(sk)) { + if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1837,19 +2001,13 @@ static void tcp_update_scoreboard(struct if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); - - /* clear xmit_retrans hint */ - if (tp->retransmit_skb_hint && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - - tp->retransmit_skb_hint = NULL; + tcp_verify_retransmit_hint(tp, skb); } } tp->scoreboard_skb_hint = skb; - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); } } @@ -1880,7 +2038,7 @@ static void tcp_cwnd_down(struct sock *s int decr = tp->snd_cwnd_cnt + 1; if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) || - (IsReno(tp) && !(flag&FLAG_NOT_DUP))) { + (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) { tp->snd_cwnd_cnt = decr&1; decr >>= 1; @@ -1913,7 +2071,7 @@ static void DBGUNDO(struct sock *sk, con printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tp->left_out, + tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); } @@ -1971,7 +2129,7 @@ static int tcp_try_undo_recovery(struct NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); tp->undo_marker = 0; } - if (tp->snd_una == tp->high_seq && IsReno(tp)) { + if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq * is ACKed. For Reno it is MUST to prevent false * fast retransmits (RFC2582). SACK TCP is safe. */ @@ -2001,7 +2159,7 @@ static int tcp_try_undo_partial(struct s { struct tcp_sock *tp = tcp_sk(sk); /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tp->fackets_out>tp->reordering; + int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed @@ -2042,12 +2200,11 @@ static int tcp_try_undo_loss(struct sock DBGUNDO(sk, "partial loss"); tp->lost_out = 0; - tp->left_out = tp->sacked_out; tcp_undo_cwr(sk, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (!IsReno(tp)) + if (tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return 1; } @@ -2066,7 +2223,7 @@ static void tcp_try_to_open(struct sock { struct tcp_sock *tp = tcp_sk(sk); - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); if (tp->retrans_out == 0) tp->retrans_stamp = 0; @@ -2077,7 +2234,7 @@ static void tcp_try_to_open(struct sock if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; - if (tp->left_out || tp->retrans_out || tp->undo_marker) + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2130,7 +2287,7 @@ static void tcp_mtup_probe_success(struc * tcp_xmit_retransmit_queue(). */ static void -tcp_fastretrans_alert(struct sock *sk, int prior_packets, int flag) +tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -2164,8 +2321,8 @@ tcp_fastretrans_alert(struct sock *sk, i NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } - /* D. Synchronize left_out to current state. */ - tcp_sync_left_out(tp); + /* D. Check consistency of the current state. */ + tcp_verify_left_out(tp); /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ @@ -2194,14 +2351,14 @@ tcp_fastretrans_alert(struct sock *sk, i if (!tp->undo_marker || /* For SACK case do not Open to allow to undo * catching for all duplicate ACKs. */ - IsReno(tp) || tp->snd_una != tp->high_seq) { + tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; tcp_set_ca_state(sk, TCP_CA_Open); } break; case TCP_CA_Recovery: - if (IsReno(tp)) + if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk)) return; @@ -2214,14 +2371,10 @@ tcp_fastretrans_alert(struct sock *sk, i switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { - if (IsReno(tp) && is_dupack) + if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else { - int acked = prior_packets - tp->packets_out; - if (IsReno(tp)) - tcp_remove_reno_sacks(sk, acked); - do_lost = tcp_try_undo_partial(sk, acked); - } + } else + do_lost = tcp_try_undo_partial(sk, pkts_acked); break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) @@ -2235,7 +2388,7 @@ tcp_fastretrans_alert(struct sock *sk, i return; /* Loss is undone; fall through to processing in Open state. */ default: - if (IsReno(tp)) { + if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) tcp_reset_reno_sack(tp); if (is_dupack) @@ -2263,7 +2416,7 @@ tcp_fastretrans_alert(struct sock *sk, i /* Otherwise enter Recovery state */ - if (IsReno(tp)) + if (tcp_is_reno(tp)) NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY); @@ -2361,8 +2514,7 @@ static void tcp_cong_avoid(struct sock * /* Restart timer after forward progress on connection. * RFC2988 recommends to restart timer to now+rto. */ - -static void tcp_ack_packets_out(struct sock *sk) +static void tcp_rearm_rto(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2503,7 +2655,7 @@ static int tcp_clean_rtx_queue(struct so last_ackt = skb->tstamp; } tcp_dec_pcount_approx(&tp->fackets_out, skb); - tcp_packets_out_dec(tp, skb); + tp->packets_out -= tcp_skb_pcount(skb); tcp_unlink_write_queue(skb, sk); sk_stream_free_skb(sk, skb); clear_all_retrans_hints(tp); @@ -2515,7 +2667,10 @@ static int tcp_clean_rtx_queue(struct so = inet_csk(sk)->icsk_ca_ops; tcp_ack_update_rtt(sk, acked, seq_rtt); - tcp_ack_packets_out(sk); + tcp_rearm_rto(sk); + + if (tcp_is_reno(tp)) + tcp_remove_reno_sacks(sk, pkts_acked); if (ca_ops->pkts_acked) { s32 rtt_us = -1; @@ -2540,7 +2695,7 @@ static int tcp_clean_rtx_queue(struct so BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); - if (!tp->packets_out && tp->rx_opt.sack_ok) { + if (!tp->packets_out && tcp_is_sack(tp)) { const struct inet_connection_sock *icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", @@ -2709,7 +2864,7 @@ static int tcp_process_frto(struct sock { struct tcp_sock *tp = tcp_sk(sk); - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Duplicate the behavior from Loss state (fastretrans_alert) */ if (flag&FLAG_DATA_ACKED) @@ -2720,7 +2875,7 @@ static int tcp_process_frto(struct sock return 1; } - if (!IsSackFrto() || IsReno(tp)) { + if (!IsSackFrto() || tcp_is_reno(tp)) { /* RFC4138 shortcoming in step 2; should also have case c): * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate @@ -2867,7 +3022,7 @@ static int tcp_ack(struct sock *sk, stru if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight, 0); - tcp_fastretrans_alert(sk, prior_packets, flag); + tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight, 1); @@ -3204,7 +3359,7 @@ static void tcp_fin(struct sk_buff *skb, * Probably, we should reset in this case. For now drop them. */ __skb_queue_purge(&tp->out_of_order_queue); - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); @@ -3234,7 +3389,7 @@ static inline int tcp_sack_extend(struct static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) { - if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { if (before(seq, tp->rcv_nxt)) NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT); else @@ -3264,7 +3419,7 @@ static void tcp_send_dupack(struct sock NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -3580,7 +3735,7 @@ drop: if (!skb_peek(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ - if (tp->rx_opt.sack_ok) { + if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; tp->rx_opt.dsack = 0; tp->rx_opt.eff_sacks = 1; @@ -3645,7 +3800,7 @@ drop: } add_sack: - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_new_ofo_skb(sk, seq, end_seq); } } @@ -3834,7 +3989,7 @@ static int tcp_prune_queue(struct sock * * is in a sad state like this, we care only about integrity * of the connection not performance. */ - if (tp->rx_opt.sack_ok) + if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_stream_mem_reclaim(sk); } @@ -4535,8 +4690,8 @@ static int tcp_rcv_synsent_state_process tp->tcp_header_len = sizeof(struct tcphdr); } - if (tp->rx_opt.sack_ok && sysctl_tcp_fack) - tp->rx_opt.sack_ok |= 2; + if (tcp_is_sack(tp) && sysctl_tcp_fack) + tcp_enable_fack(tp); tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff -puN net/ipv4/tcp_minisocks.c~git-net net/ipv4/tcp_minisocks.c --- a/net/ipv4/tcp_minisocks.c~git-net +++ a/net/ipv4/tcp_minisocks.c @@ -368,6 +368,12 @@ void tcp_twsk_destructor(struct sock *sk EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, + struct request_sock *req) +{ + tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; +} + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -399,7 +405,6 @@ struct sock *tcp_create_openreq_child(st newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; - newtp->left_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; newtp->fackets_out = 0; @@ -440,7 +445,7 @@ struct sock *tcp_create_openreq_child(st newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { if (sysctl_tcp_fack) - newtp->rx_opt.sack_ok |= 2; + tcp_enable_fack(newtp); } newtp->window_clamp = req->window_clamp; newtp->rcv_ssthresh = req->rcv_wnd; diff -puN net/ipv4/tcp_output.c~git-net net/ipv4/tcp_output.c --- a/net/ipv4/tcp_output.c~git-net +++ a/net/ipv4/tcp_output.c @@ -61,6 +61,18 @@ int sysctl_tcp_base_mss __read_mostly = /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +static inline void tcp_packets_out_inc(struct sock *sk, + const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + int orig = tp->packets_out; + + tp->packets_out += tcp_skb_pcount(skb); + if (!orig) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); +} + static void update_send_head(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -269,6 +281,56 @@ static u16 tcp_select_window(struct sock return new_win; } +static inline void TCP_ECN_send_synack(struct tcp_sock *tp, + struct sk_buff *skb) +{ + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; + if (!(tp->ecn_flags&TCP_ECN_OK)) + TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; +} + +static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->ecn_flags = 0; + if (sysctl_tcp_ecn) { + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; + tp->ecn_flags = TCP_ECN_OK; + } +} + +static __inline__ void +TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) +{ + if (inet_rsk(req)->ecn_ok) + th->ece = 1; +} + +static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, + int tcp_header_len) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->ecn_flags & TCP_ECN_OK) { + /* Not-retransmitted data segment: set ECT and inject CWR. */ + if (skb->len != tcp_header_len && + !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { + INET_ECN_xmit(sk); + if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; + tcp_hdr(skb)->cwr = 1; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + } + } else { + /* ACK or retransmitted segment: clear ECT|CE */ + INET_ECN_dontxmit(sk); + } + if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) + tcp_hdr(skb)->ece = 1; + } +} + static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, __u32 tstamp, __u8 **md5_hash) { @@ -584,11 +646,7 @@ static void tcp_set_skb_tso_segs(struct skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_type = 0; } else { - unsigned int factor; - - factor = skb->len + (mss_now - 1); - factor /= mss_now; - skb_shinfo(skb)->gso_segs = factor; + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; skb_shinfo(skb)->gso_type = sk->sk_gso_type; } @@ -682,30 +740,22 @@ int tcp_fragment(struct sock *sk, struct if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= diff; - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) tp->lost_out -= diff; - tp->left_out -= diff; - } if (diff > 0) { /* Adjust Reno SACK estimate. */ - if (!tp->rx_opt.sack_ok) { - tp->sacked_out -= diff; - if ((int)tp->sacked_out < 0) - tp->sacked_out = 0; - tcp_sync_left_out(tp); + if (tcp_is_reno(tp)) { + tcp_dec_pcount_approx_int(&tp->sacked_out, diff); + tcp_verify_left_out(tp); } - tp->fackets_out -= diff; - if ((int)tp->fackets_out < 0) - tp->fackets_out = 0; + tcp_dec_pcount_approx_int(&tp->fackets_out, diff); /* SACK fastpath might overwrite it unless dealt with */ if (tp->fastpath_skb_hint != NULL && after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq)) { - tp->fastpath_cnt_hint -= diff; - if ((int)tp->fastpath_cnt_hint < 0) - tp->fastpath_cnt_hint = 0; + tcp_dec_pcount_approx_int(&tp->fastpath_cnt_hint, diff); } } } @@ -1683,21 +1733,17 @@ static void tcp_retrans_try_collapse(str TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) tp->retrans_out -= tcp_skb_pcount(next_skb); - if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { + if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) tp->lost_out -= tcp_skb_pcount(next_skb); - tp->left_out -= tcp_skb_pcount(next_skb); - } /* Reno case is special. Sigh... */ - if (!tp->rx_opt.sack_ok && tp->sacked_out) { + if (tcp_is_reno(tp) && tp->sacked_out) tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - tp->left_out -= tcp_skb_pcount(next_skb); - } /* Not quite right: it can be > snd.fack, but * it is better to underestimate fackets. */ tcp_dec_pcount_approx(&tp->fackets_out, next_skb); - tcp_packets_out_dec(tp, next_skb); + tp->packets_out -= tcp_skb_pcount(next_skb); sk_stream_free_skb(sk, next_skb); } } @@ -1736,7 +1782,7 @@ void tcp_simple_retransmit(struct sock * if (!lost) return; - tcp_sync_left_out(tp); + tcp_verify_left_out(tp); /* Don't muck with the congestion window here. * Reason is that we do not increase amount of _data_ @@ -1938,40 +1984,35 @@ void tcp_xmit_retransmit_queue(struct so return; /* No forward retransmissions in Reno are possible. */ - if (!tp->rx_opt.sack_ok) + if (tcp_is_reno(tp)) return; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... * * For now we do not retransmit anything, while we have some new - * segments to send. + * segments to send. In the other cases, follow rule 3 for + * NextSeg() specified in RFC3517. */ if (tcp_may_send_now(sk)) return; - if (tp->forward_skb_hint) { + /* If nothing is SACKed, highest_sack in the loop won't be valid */ + if (!tp->sacked_out) + return; + + if (tp->forward_skb_hint) skb = tp->forward_skb_hint; - packet_cnt = tp->forward_cnt_hint; - } else{ + else skb = tcp_write_queue_head(sk); - packet_cnt = 0; - } tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) break; - tp->forward_cnt_hint = packet_cnt; tp->forward_skb_hint = skb; - /* Similar to the retransmit loop above we - * can pretend that the retransmitted SKB - * we send out here will be composed of one - * real MSS sized packet because tcp_retransmit_skb() - * will fragment it if necessary. - */ - if (++packet_cnt > tp->fackets_out) + if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) diff -puN net/ipv4/tcp_timer.c~git-net net/ipv4/tcp_timer.c --- a/net/ipv4/tcp_timer.c~git-net +++ a/net/ipv4/tcp_timer.c @@ -315,7 +315,7 @@ static void tcp_retransmit_timer(struct if (icsk->icsk_retransmits == 0) { if (icsk->icsk_ca_state == TCP_CA_Disorder || icsk->icsk_ca_state == TCP_CA_Recovery) { - if (tp->rx_opt.sack_ok) { + if (tcp_is_sack(tp)) { if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); else diff -puN net/ipv4/udp.c~git-net net/ipv4/udp.c --- a/net/ipv4/udp.c~git-net +++ a/net/ipv4/udp.c @@ -113,9 +113,8 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_sta struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static int udp_port_rover; - -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +static inline int __udp_lib_lport_inuse(__u16 num, + const struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; @@ -132,11 +131,10 @@ static inline int __udp_lib_lport_inuse( * @sk: socket struct in question * @snum: port number to look up * @udptable: hash list table, must be of UDP_HTABLE_SIZE - * @port_rover: pointer to record of last unallocated port * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], int *port_rover, + struct hlist_head udptable[], int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { @@ -146,49 +144,56 @@ int __udp_lib_get_port(struct sock *sk, int error = 1; write_lock_bh(&udp_hash_lock); - if (snum == 0) { - int best_size_so_far, best, result, i; - if (*port_rover > sysctl_local_port_range[1] || - *port_rover < sysctl_local_port_range[0]) - *port_rover = sysctl_local_port_range[0]; - best_size_so_far = 32767; - best = result = *port_rover; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - int size; - - head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(head)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); + if (!snum) { + int i; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + unsigned rover, best, best_size_so_far; + + best_size_so_far = UINT_MAX; + best = rover = net_random() % (high - low) + low; + + /* 1st pass: look for empty (or shortest) hash chain */ + for (i = 0; i < UDP_HTABLE_SIZE; i++) { + int size = 0; + + head = &udptable[rover & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) goto gotit; - } - size = 0; + sk_for_each(sk2, node, head) { if (++size >= best_size_so_far) goto next; } best_size_so_far = size; - best = result; + best = rover; next: - ; + /* fold back if end of range */ + if (++rover > high) + rover = low + ((rover - low) + & (UDP_HTABLE_SIZE - 1)); + + } - result = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; - i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) - break; + + /* 2nd pass: find hole in shortest hash chain */ + rover = best; + for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { + if (! __udp_lib_lport_inuse(rover, udptable)) + goto gotit; + rover += UDP_HTABLE_SIZE; + if (rover > high) + rover = low + ((rover - low) + & (UDP_HTABLE_SIZE - 1)); } - if (i >= (1 << 16) / UDP_HTABLE_SIZE) - goto fail; + + + /* All ports in use! */ + goto fail; + gotit: - *port_rover = snum = result; + snum = rover; } else { head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; @@ -201,6 +206,7 @@ gotit: (*saddr_comp)(sk, sk2) ) goto fail; } + inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { @@ -217,7 +223,7 @@ fail: int udp_get_port(struct sock *sk, unsigned short snum, int (*scmp)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); + return __udp_lib_get_port(sk, snum, udp_hash, scmp); } int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) diff -puN net/ipv4/udp_impl.h~git-net net/ipv4/udp_impl.h --- a/net/ipv4/udp_impl.h~git-net +++ a/net/ipv4/udp_impl.h @@ -9,7 +9,7 @@ extern int __udp4_lib_rcv(struct sk_bu extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], int *port_rover, + struct hlist_head udptable[], int (*)(const struct sock*,const struct sock*)); extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); diff -puN net/ipv4/udplite.c~git-net net/ipv4/udplite.c --- a/net/ipv4/udplite.c~git-net +++ a/net/ipv4/udplite.c @@ -16,12 +16,11 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly; struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; -static int udplite_port_rover; int udplite_get_port(struct sock *sk, unsigned short p, int (*c)(const struct sock *, const struct sock *)) { - return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); + return __udp_lib_get_port(sk, p, udplite_hash, c); } static int udplite_v4_get_port(struct sock *sk, unsigned short snum) diff -puN net/ipv6/inet6_connection_sock.c~git-net net/ipv6/inet6_connection_sock.c --- a/net/ipv6/inet6_connection_sock.c~git-net +++ a/net/ipv6/inet6_connection_sock.c @@ -139,6 +139,36 @@ void inet6_csk_addr2sockaddr(struct sock EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); +static inline +void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, + struct in6_addr *daddr, struct in6_addr *saddr) +{ + struct rt6_info *rt = (struct rt6_info *)dst; + + __ip6_dst_store(sk, dst, daddr, saddr); + rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); +} + +static inline +struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) +{ + struct dst_entry *dst; + struct rt6_info *rt; + + dst = __sk_dst_check(sk, cookie); + if (!dst) + goto end; + + rt = (struct rt6_info *)dst; + if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { + sk->sk_dst_cache = NULL; + dst_release(dst); + dst = NULL; + } + end: + return dst; +} + int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) { struct sock *sk = skb->sk; @@ -166,7 +196,7 @@ int inet6_csk_xmit(struct sk_buff *skb, final_p = &final; } - dst = __sk_dst_check(sk, np->dst_cookie); + dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (dst == NULL) { int err = ip6_dst_lookup(sk, &dst, &fl); @@ -186,7 +216,7 @@ int inet6_csk_xmit(struct sk_buff *skb, return err; } - __ip6_dst_store(sk, dst, NULL, NULL); + __inet6_csk_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff -puN net/ipv6/ip6_output.c~git-net net/ipv6/ip6_output.c --- a/net/ipv6/ip6_output.c~git-net +++ a/net/ipv6/ip6_output.c @@ -441,8 +441,10 @@ int ip6_forward(struct sk_buff *skb) /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. + We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) { + if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && + !skb->sp) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; diff -puN net/ipv6/ipv6_sockglue.c~git-net net/ipv6/ipv6_sockglue.c --- a/net/ipv6/ipv6_sockglue.c~git-net +++ a/net/ipv6/ipv6_sockglue.c @@ -249,7 +249,7 @@ static int do_ipv6_setsockopt(struct soc } if (ipv6_only_sock(sk) || - !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) { + !ipv6_addr_v4mapped(&np->daddr)) { retv = -EADDRNOTAVAIL; break; } diff -puN net/ipv6/tcp_ipv6.c~git-net net/ipv6/tcp_ipv6.c --- a/net/ipv6/tcp_ipv6.c~git-net +++ a/net/ipv6/tcp_ipv6.c @@ -697,7 +697,7 @@ static int tcp_v6_parse_md5_keys (struct if (!cmd.tcpm_keylen) { if (!tcp_sk(sk)->md5sig_info) return -ENOENT; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]); return tcp_v6_md5_do_del(sk, &sin6->sin6_addr); } @@ -720,7 +720,7 @@ static int tcp_v6_parse_md5_keys (struct newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); if (!newkey) return -ENOMEM; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED) { + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3], newkey, cmd.tcpm_keylen); } diff -puN net/ipv6/udp.c~git-net net/ipv6/udp.c --- a/net/ipv6/udp.c~git-net +++ a/net/ipv6/udp.c @@ -610,7 +610,7 @@ int udpv6_sendmsg(struct kiocb *iocb, st daddr = NULL; if (daddr) { - if (ipv6_addr_type(daddr) == IPV6_ADDR_MAPPED) { + if (ipv6_addr_v4mapped(daddr)) { struct sockaddr_in sin; sin.sin_family = AF_INET; sin.sin_port = sin6 ? sin6->sin6_port : inet->dport; diff -puN net/irda/af_irda.c~git-net net/irda/af_irda.c --- a/net/irda/af_irda.c~git-net +++ a/net/irda/af_irda.c @@ -1245,18 +1245,17 @@ static int irda_sendmsg(struct kiocb *io struct sock *sk = sock->sk; struct irda_sock *self; struct sk_buff *skb; - int err; + int err = -EPIPE; IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len); /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | + MSG_NOSIGNAL)) return -EINVAL; - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - return -EPIPE; - } + if (sk->sk_shutdown & SEND_SHUTDOWN) + goto out_err; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; @@ -1283,7 +1282,7 @@ static int irda_sendmsg(struct kiocb *io skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16, msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) - return -ENOBUFS; + goto out_err; skb_reserve(skb, self->max_header_size + 16); skb_reset_transport_header(skb); @@ -1291,7 +1290,7 @@ static int irda_sendmsg(struct kiocb *io err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len); if (err) { kfree_skb(skb); - return err; + goto out_err; } /* @@ -1301,10 +1300,14 @@ static int irda_sendmsg(struct kiocb *io err = irttp_data_request(self->tsap, skb); if (err) { IRDA_DEBUG(0, "%s(), err=%d\n", __FUNCTION__, err); - return err; + goto out_err; } /* Tell client how much data we actually sent */ return len; + + out_err: + return sk_stream_error(sk, msg->msg_flags, err); + } /* diff -puN net/key/af_key.c~git-net net/key/af_key.c --- a/net/key/af_key.c~git-net +++ a/net/key/af_key.c @@ -352,16 +352,14 @@ static int verify_address_len(void *p) switch (addr->sa_family) { case AF_INET: - len = sizeof(*sp) + sizeof(*sin) + (sizeof(uint64_t) - 1); - len /= sizeof(uint64_t); + len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case AF_INET6: - len = sizeof(*sp) + sizeof(*sin6) + (sizeof(uint64_t) - 1); - len /= sizeof(uint64_t); + len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; @@ -386,14 +384,9 @@ static int verify_address_len(void *p) static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx) { - int len = 0; - - len += sizeof(struct sadb_x_sec_ctx); - len += sec_ctx->sadb_x_ctx_len; - len += sizeof(uint64_t) - 1; - len /= sizeof(uint64_t); - - return len; + return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + + sec_ctx->sadb_x_ctx_len, + sizeof(uint64_t)); } static inline int verify_sec_ctx_len(void *p) diff -puN net/mac80211/Makefile~git-net net/mac80211/Makefile --- a/net/mac80211/Makefile~git-net +++ a/net/mac80211/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_MAC80211) += mac80211.o rc8 mac80211-objs-$(CONFIG_MAC80211_LEDS) += ieee80211_led.o mac80211-objs-$(CONFIG_MAC80211_DEBUGFS) += debugfs.o debugfs_sta.o debugfs_netdev.o debugfs_key.o +mac80211-objs-$(CONFIG_NET_SCHED) += wme.o mac80211-objs := \ ieee80211.o \ @@ -16,6 +17,10 @@ mac80211-objs := \ regdomain.o \ tkip.o \ aes_ccm.o \ - wme.o \ ieee80211_cfg.o \ + rx.o \ + tx.o \ + key.o \ + util.o \ + event.o \ $(mac80211-objs-y) diff -puN net/mac80211/aes_ccm.c~git-net net/mac80211/aes_ccm.c --- a/net/mac80211/aes_ccm.c~git-net +++ a/net/mac80211/aes_ccm.c @@ -7,6 +7,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -63,7 +64,7 @@ void ieee80211_aes_ccm_encrypt(struct cr s_0 = scratch + AES_BLOCK_LEN; e = scratch + 2 * AES_BLOCK_LEN; - num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; aes_ccm_prepare(tfm, b_0, aad, b, s_0, b); @@ -102,7 +103,7 @@ int ieee80211_aes_ccm_decrypt(struct cry s_0 = scratch + AES_BLOCK_LEN; a = scratch + 2 * AES_BLOCK_LEN; - num_blocks = (data_len + AES_BLOCK_LEN - 1) / AES_BLOCK_LEN; + num_blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); last_len = data_len % AES_BLOCK_LEN; aes_ccm_prepare(tfm, b_0, aad, b, s_0, a); diff -puN net/mac80211/debugfs.c~git-net net/mac80211/debugfs.c --- a/net/mac80211/debugfs.c~git-net +++ a/net/mac80211/debugfs.c @@ -86,8 +86,6 @@ DEBUGFS_READONLY_FILE(channel, 20, "%d", local->hw.conf.channel); DEBUGFS_READONLY_FILE(frequency, 20, "%d", local->hw.conf.freq); -DEBUGFS_READONLY_FILE(radar_detect, 20, "%d", - local->hw.conf.radar_detect); DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", local->hw.conf.antenna_sel_tx); DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", @@ -110,9 +108,6 @@ DEBUGFS_READONLY_FILE(mode, 20, "%s", ieee80211_mode_str(local->hw.conf.phymode)); DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", local->wep_iv & 0xffffff); -DEBUGFS_READONLY_FILE(tx_power_reduction, 20, "%d.%d dBm", - local->hw.conf.tx_power_reduction / 10, - local->hw.conf.tx_power_reduction % 10); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : ""); @@ -305,7 +300,6 @@ void debugfs_hw_add(struct ieee80211_loc DEBUGFS_ADD(channel); DEBUGFS_ADD(frequency); - DEBUGFS_ADD(radar_detect); DEBUGFS_ADD(antenna_sel_tx); DEBUGFS_ADD(antenna_sel_rx); DEBUGFS_ADD(bridge_packets); @@ -317,7 +311,6 @@ void debugfs_hw_add(struct ieee80211_loc DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(mode); DEBUGFS_ADD(wep_iv); - DEBUGFS_ADD(tx_power_reduction); DEBUGFS_ADD(modes); statsd = debugfs_create_dir("statistics", phyd); @@ -370,7 +363,6 @@ void debugfs_hw_del(struct ieee80211_loc { DEBUGFS_DEL(channel); DEBUGFS_DEL(frequency); - DEBUGFS_DEL(radar_detect); DEBUGFS_DEL(antenna_sel_tx); DEBUGFS_DEL(antenna_sel_rx); DEBUGFS_DEL(bridge_packets); @@ -382,7 +374,6 @@ void debugfs_hw_del(struct ieee80211_loc DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(mode); DEBUGFS_DEL(wep_iv); - DEBUGFS_DEL(tx_power_reduction); DEBUGFS_DEL(modes); DEBUGFS_STATS_DEL(transmitted_fragment_count); diff -puN net/mac80211/debugfs_key.c~git-net net/mac80211/debugfs_key.c --- a/net/mac80211/debugfs_key.c~git-net +++ a/net/mac80211/debugfs_key.c @@ -14,17 +14,18 @@ #include "debugfs.h" #include "debugfs_key.h" -#define KEY_READ(name, buflen, format_string) \ +#define KEY_READ(name, prop, buflen, format_string) \ static ssize_t key_##name##_read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ char buf[buflen]; \ struct ieee80211_key *key = file->private_data; \ - int res = scnprintf(buf, buflen, format_string, key->name); \ + int res = scnprintf(buf, buflen, format_string, key->prop); \ return simple_read_from_buffer(userbuf, count, ppos, buf, res); \ } -#define KEY_READ_D(name) KEY_READ(name, 20, "%d\n") +#define KEY_READ_D(name) KEY_READ(name, name, 20, "%d\n") +#define KEY_READ_X(name) KEY_READ(name, name, 20, "0x%x\n") #define KEY_OPS(name) \ static const struct file_operations key_ ##name## _ops = { \ @@ -36,11 +37,27 @@ static const struct file_operations key_ KEY_READ_##format(name) \ KEY_OPS(name) -KEY_FILE(keylen, D); -KEY_FILE(force_sw_encrypt, D); -KEY_FILE(keyidx, D); -KEY_FILE(hw_key_idx, D); +#define KEY_CONF_READ(name, buflen, format_string) \ + KEY_READ(conf_##name, conf.name, buflen, format_string) +#define KEY_CONF_READ_D(name) KEY_CONF_READ(name, 20, "%d\n") + +#define KEY_CONF_OPS(name) \ +static const struct file_operations key_ ##name## _ops = { \ + .read = key_conf_##name##_read, \ + .open = mac80211_open_file_generic, \ +} + +#define KEY_CONF_FILE(name, format) \ + KEY_CONF_READ_##format(name) \ + KEY_CONF_OPS(name) + +KEY_CONF_FILE(keylen, D); +KEY_CONF_FILE(keyidx, D); +KEY_CONF_FILE(hw_key_idx, D); +KEY_FILE(flags, X); KEY_FILE(tx_rx_count, D); +KEY_READ(ifindex, sdata->dev->ifindex, 20, "%d\n"); +KEY_OPS(ifindex); static ssize_t key_algorithm_read(struct file *file, char __user *userbuf, @@ -49,7 +66,7 @@ static ssize_t key_algorithm_read(struct char *alg; struct ieee80211_key *key = file->private_data; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: alg = "WEP\n"; break; @@ -74,17 +91,20 @@ static ssize_t key_tx_spec_read(struct f int len; struct ieee80211_key *key = file->private_data; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: len = scnprintf(buf, sizeof(buf), "\n"); + break; case ALG_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", key->u.tkip.iv32, key->u.tkip.iv16); + break; case ALG_CCMP: tpn = key->u.ccmp.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); + break; default: return 0; } @@ -100,9 +120,10 @@ static ssize_t key_rx_spec_read(struct f int i, len; const u8 *rpn; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: len = scnprintf(buf, sizeof(buf), "\n"); + break; case ALG_TKIP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, @@ -110,6 +131,7 @@ static ssize_t key_rx_spec_read(struct f key->u.tkip.iv32_rx[i], key->u.tkip.iv16_rx[i]); len = p - buf; + break; case ALG_CCMP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { rpn = key->u.ccmp.rx_pn[i]; @@ -119,6 +141,7 @@ static ssize_t key_rx_spec_read(struct f rpn[3], rpn[4], rpn[5]); } len = p - buf; + break; default: return 0; } @@ -133,7 +156,7 @@ static ssize_t key_replays_read(struct f char buf[20]; int len; - if (key->alg != ALG_CCMP) + if (key->conf.alg != ALG_CCMP) return 0; len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); return simple_read_from_buffer(userbuf, count, ppos, buf, len); @@ -144,12 +167,12 @@ static ssize_t key_key_read(struct file size_t count, loff_t *ppos) { struct ieee80211_key *key = file->private_data; - int i, res, bufsize = 2*key->keylen+2; + int i, res, bufsize = 2 * key->conf.keylen + 2; char *buf = kmalloc(bufsize, GFP_KERNEL); char *p = buf; - for (i = 0; i < key->keylen; i++) - p += scnprintf(p, bufsize+buf-p, "%02x", key->key[i]); + for (i = 0; i < key->conf.keylen; i++) + p += scnprintf(p, bufsize + buf - p, "%02x", key->conf.key[i]); p += scnprintf(p, bufsize+buf-p, "\n"); res = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); @@ -164,12 +187,14 @@ KEY_OPS(key); void ieee80211_debugfs_key_add(struct ieee80211_local *local, struct ieee80211_key *key) { + static int keycount; char buf[20]; if (!local->debugfs.keys) return; - sprintf(buf, "%d", key->keyidx); + sprintf(buf, "%d", keycount); + keycount++; key->debugfs.dir = debugfs_create_dir(buf, local->debugfs.keys); @@ -177,7 +202,7 @@ void ieee80211_debugfs_key_add(struct ie return; DEBUGFS_ADD(keylen); - DEBUGFS_ADD(force_sw_encrypt); + DEBUGFS_ADD(flags); DEBUGFS_ADD(keyidx); DEBUGFS_ADD(hw_key_idx); DEBUGFS_ADD(tx_rx_count); @@ -186,6 +211,7 @@ void ieee80211_debugfs_key_add(struct ie DEBUGFS_ADD(rx_spec); DEBUGFS_ADD(replays); DEBUGFS_ADD(key); + DEBUGFS_ADD(ifindex); }; #define DEBUGFS_DEL(name) \ @@ -197,7 +223,7 @@ void ieee80211_debugfs_key_remove(struct return; DEBUGFS_DEL(keylen); - DEBUGFS_DEL(force_sw_encrypt); + DEBUGFS_DEL(flags); DEBUGFS_DEL(keyidx); DEBUGFS_DEL(hw_key_idx); DEBUGFS_DEL(tx_rx_count); @@ -206,6 +232,7 @@ void ieee80211_debugfs_key_remove(struct DEBUGFS_DEL(rx_spec); DEBUGFS_DEL(replays); DEBUGFS_DEL(key); + DEBUGFS_DEL(ifindex); debugfs_remove(key->debugfs.stalink); key->debugfs.stalink = NULL; @@ -219,7 +246,7 @@ void ieee80211_debugfs_key_add_default(s if (!sdata->debugfsdir) return; - sprintf(buf, "../keys/%d", sdata->default_key->keyidx); + sprintf(buf, "../keys/%d", sdata->default_key->conf.keyidx); sdata->debugfs.default_key = debugfs_create_symlink("default_key", sdata->debugfsdir, buf); } @@ -239,7 +266,7 @@ void ieee80211_debugfs_key_sta_link(stru if (!key->debugfs.dir) return; - sprintf(buf, "../sta/" MAC_FMT, MAC_ARG(sta->addr)); + sprintf(buf, "../../stations/" MAC_FMT, MAC_ARG(sta->addr)); key->debugfs.stalink = debugfs_create_symlink("station", key->debugfs.dir, buf); } diff -puN net/mac80211/debugfs_netdev.c~git-net net/mac80211/debugfs_netdev.c --- a/net/mac80211/debugfs_netdev.c~git-net +++ a/net/mac80211/debugfs_netdev.c @@ -112,13 +112,13 @@ static ssize_t ieee80211_if_fmt_flags( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", - sdata->u.sta.ssid_set ? "SSID\n" : "", - sdata->u.sta.bssid_set ? "BSSID\n" : "", - sdata->u.sta.prev_bssid_set ? "prev BSSID\n" : "", - sdata->u.sta.authenticated ? "AUTH\n" : "", - sdata->u.sta.associated ? "ASSOC\n" : "", - sdata->u.sta.probereq_poll ? "PROBEREQ POLL\n" : "", - sdata->use_protection ? "CTS prot\n" : ""); + sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", + sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", + sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", + sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", + sdata->flags & IEEE80211_SDATA_USE_PROTECTION ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); diff -puN net/mac80211/debugfs_sta.c~git-net net/mac80211/debugfs_sta.c --- a/net/mac80211/debugfs_sta.c~git-net +++ a/net/mac80211/debugfs_sta.c @@ -60,7 +60,6 @@ static const struct file_operations sta_ STA_OPS(name) STA_FILE(aid, aid, D); -STA_FILE(key_idx_compression, key_idx_compression, D); STA_FILE(dev, dev->name, S); STA_FILE(vlan_id, vlan_id, D); STA_FILE(rx_packets, rx_packets, LU); diff -puN /dev/null net/mac80211/event.c --- /dev/null +++ a/net/mac80211/event.c @@ -0,0 +1,42 @@ +/* + * Copyright 2007 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * mac80211 - events + */ + +#include +#include +#include "ieee80211_i.h" + +/* + * indicate a failed Michael MIC to userspace; the passed packet + * (in the variable hdr) must be long enough to extract the TKIP + * fields like TSC + */ +void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, + struct ieee80211_hdr *hdr) +{ + union iwreq_data wrqu; + char *buf = kmalloc(128, GFP_ATOMIC); + + if (buf) { + /* TODO: needed parameters: count, key type, TSC */ + sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" + "keyid=%d %scast addr=" MAC_FMT ")", + keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", + MAC_ARG(hdr->addr2)); + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = strlen(buf); + wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); + kfree(buf); + } + + /* + * TODO: re-add support for sending MIC failure indication + * with all info via nl80211 + */ +} diff -puN net/mac80211/hostapd_ioctl.h~git-net net/mac80211/hostapd_ioctl.h --- a/net/mac80211/hostapd_ioctl.h~git-net +++ a/net/mac80211/hostapd_ioctl.h @@ -32,31 +32,15 @@ enum { PRISM2_PARAM_PREAMBLE = 1003, PRISM2_PARAM_SHORT_SLOT_TIME = 1006, PRISM2_PARAM_NEXT_MODE = 1008, - PRISM2_PARAM_RADIO_ENABLED = 1010, - PRISM2_PARAM_ANTENNA_MODE = 1013, - PRISM2_PARAM_STAT_TIME = 1016, - PRISM2_PARAM_STA_ANTENNA_SEL = 1017, - PRISM2_PARAM_TX_POWER_REDUCTION = 1022, PRISM2_PARAM_KEY_TX_RX_THRESHOLD = 1024, - PRISM2_PARAM_DEFAULT_WEP_ONLY = 1026, PRISM2_PARAM_WIFI_WME_NOACK_TEST = 1033, PRISM2_PARAM_SCAN_FLAGS = 1035, PRISM2_PARAM_HW_MODES = 1036, PRISM2_PARAM_CREATE_IBSS = 1037, PRISM2_PARAM_WMM_ENABLED = 1038, PRISM2_PARAM_MIXED_CELL = 1039, - PRISM2_PARAM_RADAR_DETECT = 1043, - PRISM2_PARAM_SPECTRUM_MGMT = 1044, }; -enum { - IEEE80211_KEY_MGMT_NONE = 0, - IEEE80211_KEY_MGMT_IEEE8021X = 1, - IEEE80211_KEY_MGMT_WPA_PSK = 2, - IEEE80211_KEY_MGMT_WPA_EAP = 3, -}; - - /* Data structures used for get_hw_features ioctl */ struct hostapd_ioctl_hw_modes_hdr { int mode; @@ -75,26 +59,4 @@ struct ieee80211_rate_data { int flags; /* IEEE80211_RATE_ flags */ }; - -/* ADD_IF, REMOVE_IF, and UPDATE_IF 'type' argument */ -enum { - HOSTAP_IF_WDS = 1, HOSTAP_IF_VLAN = 2, HOSTAP_IF_BSS = 3, - HOSTAP_IF_STA = 4 -}; - -struct hostapd_if_wds { - u8 remote_addr[ETH_ALEN]; -}; - -struct hostapd_if_vlan { - u8 id; -}; - -struct hostapd_if_bss { - u8 bssid[ETH_ALEN]; -}; - -struct hostapd_if_sta { -}; - #endif /* HOSTAPD_IOCTL_H */ diff -puN net/mac80211/ieee80211.c~git-net net/mac80211/ieee80211.c --- a/net/mac80211/ieee80211.c~git-net +++ a/net/mac80211/ieee80211.c @@ -20,42 +20,19 @@ #include #include #include -#include -#include #include #include -#include #include "ieee80211_common.h" #include "ieee80211_i.h" #include "ieee80211_rate.h" #include "wep.h" -#include "wpa.h" -#include "tkip.h" #include "wme.h" #include "aes_ccm.h" #include "ieee80211_led.h" #include "ieee80211_cfg.h" #include "debugfs.h" #include "debugfs_netdev.h" -#include "debugfs_key.h" - -/* privid for wiphys to determine whether they belong to us or not */ -void *mac80211_wiphy_privid = &mac80211_wiphy_privid; - -/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ -/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -static const unsigned char rfc1042_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; - -/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -static const unsigned char bridge_tunnel_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; - -/* No encapsulation header if EtherType < 0x600 (=length) */ -static const unsigned char eapol_header[] = - { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; - /* * For seeing transmitted packets on monitor interfaces @@ -67,164 +44,166 @@ struct ieee80211_tx_status_rtap_hdr { u8 data_retries; } __attribute__ ((packed)); +/* common interface routines */ -static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, - struct ieee80211_hdr *hdr) +static struct net_device_stats *ieee80211_get_stats(struct net_device *dev) { - /* Set the sequence number for this frame. */ - hdr->seq_ctrl = cpu_to_le16(sdata->sequence); - - /* Increase the sequence number. */ - sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; + struct ieee80211_sub_if_data *sdata; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + return &(sdata->stats); } -struct ieee80211_key_conf * -ieee80211_key_data2conf(struct ieee80211_local *local, - const struct ieee80211_key *data) +static int header_parse_80211(struct sk_buff *skb, unsigned char *haddr) { - struct ieee80211_key_conf *conf; - - conf = kmalloc(sizeof(*conf) + data->keylen, GFP_ATOMIC); - if (!conf) - return NULL; - - conf->hw_key_idx = data->hw_key_idx; - conf->alg = data->alg; - conf->keylen = data->keylen; - conf->flags = 0; - if (data->force_sw_encrypt) - conf->flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT; - conf->keyidx = data->keyidx; - if (data->default_tx_key) - conf->flags |= IEEE80211_KEY_DEFAULT_TX_KEY; - if (local->default_wep_only) - conf->flags |= IEEE80211_KEY_DEFAULT_WEP_ONLY; - memcpy(conf->key, data->key, data->keylen); - - return conf; + memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ + return ETH_ALEN; } -struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, - int idx, size_t key_len, gfp_t flags) -{ - struct ieee80211_key *key; - - key = kzalloc(sizeof(struct ieee80211_key) + key_len, flags); - if (!key) - return NULL; - kref_init(&key->kref); - return key; -} +/* master interface */ -static void ieee80211_key_release(struct kref *kref) +static int ieee80211_master_open(struct net_device *dev) { - struct ieee80211_key *key; - - key = container_of(kref, struct ieee80211_key, kref); - if (key->alg == ALG_CCMP) - ieee80211_aes_key_free(key->u.ccmp.tfm); - ieee80211_debugfs_key_remove(key); - kfree(key); -} + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; + int res = -EOPNOTSUPP; -void ieee80211_key_free(struct ieee80211_key *key) -{ - if (key) - kref_put(&key->kref, ieee80211_key_release); + read_lock(&local->sub_if_lock); + list_for_each_entry(sdata, &local->sub_if_list, list) { + if (sdata->dev != dev && netif_running(sdata->dev)) { + res = 0; + break; + } + } + read_unlock(&local->sub_if_lock); + return res; } -static int rate_list_match(const int *rate_list, int rate) +static int ieee80211_master_stop(struct net_device *dev) { - int i; - - if (!rate_list) - return 0; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; - for (i = 0; rate_list[i] >= 0; i++) - if (rate_list[i] == rate) - return 1; + read_lock(&local->sub_if_lock); + list_for_each_entry(sdata, &local->sub_if_list, list) + if (sdata->dev != dev && netif_running(sdata->dev)) + dev_close(sdata->dev); + read_unlock(&local->sub_if_lock); return 0; } +/* management interface */ -void ieee80211_prepare_rates(struct ieee80211_local *local, - struct ieee80211_hw_mode *mode) +static void +ieee80211_fill_frame_info(struct ieee80211_local *local, + struct ieee80211_frame_info *fi, + struct ieee80211_rx_status *status) { - int i; - - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *rate = &mode->rates[i]; - - rate->flags &= ~(IEEE80211_RATE_SUPPORTED | - IEEE80211_RATE_BASIC); - - if (local->supp_rates[mode->mode]) { - if (!rate_list_match(local->supp_rates[mode->mode], - rate->rate)) - continue; - } - - rate->flags |= IEEE80211_RATE_SUPPORTED; + if (status) { + struct timespec ts; + struct ieee80211_rate *rate; - /* Use configured basic rate set if it is available. If not, - * use defaults that are sane for most cases. */ - if (local->basic_rates[mode->mode]) { - if (rate_list_match(local->basic_rates[mode->mode], - rate->rate)) - rate->flags |= IEEE80211_RATE_BASIC; - } else switch (mode->mode) { + jiffies_to_timespec(jiffies, &ts); + fi->hosttime = cpu_to_be64((u64) ts.tv_sec * 1000000 + + ts.tv_nsec / 1000); + fi->mactime = cpu_to_be64(status->mactime); + switch (status->phymode) { case MODE_IEEE80211A: - if (rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_BASIC; + fi->phytype = htonl(ieee80211_phytype_ofdm_dot11_a); break; case MODE_IEEE80211B: - if (rate->rate == 10 || rate->rate == 20) - rate->flags |= IEEE80211_RATE_BASIC; - break; - case MODE_ATHEROS_TURBO: - if (rate->rate == 120 || rate->rate == 240 || - rate->rate == 480) - rate->flags |= IEEE80211_RATE_BASIC; + fi->phytype = htonl(ieee80211_phytype_dsss_dot11_b); break; case MODE_IEEE80211G: - if (rate->rate == 10 || rate->rate == 20 || - rate->rate == 55 || rate->rate == 110) - rate->flags |= IEEE80211_RATE_BASIC; - break; - } - - /* Set ERP and MANDATORY flags based on phymode */ - switch (mode->mode) { - case MODE_IEEE80211A: - if (rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_MANDATORY; - break; - case MODE_IEEE80211B: - if (rate->rate == 10) - rate->flags |= IEEE80211_RATE_MANDATORY; + fi->phytype = htonl(ieee80211_phytype_pbcc_dot11_g); break; case MODE_ATHEROS_TURBO: + fi->phytype = + htonl(ieee80211_phytype_dsss_dot11_turbo); break; - case MODE_IEEE80211G: - if (rate->rate == 10 || rate->rate == 20 || - rate->rate == 55 || rate->rate == 110 || - rate->rate == 60 || rate->rate == 120 || - rate->rate == 240) - rate->flags |= IEEE80211_RATE_MANDATORY; + default: + fi->phytype = htonl(0xAAAAAAAA); break; } - if (ieee80211_is_erp_rate(mode->mode, rate->rate)) - rate->flags |= IEEE80211_RATE_ERP; + fi->channel = htonl(status->channel); + rate = ieee80211_get_rate(local, status->phymode, + status->rate); + if (rate) { + fi->datarate = htonl(rate->rate); + if (rate->flags & IEEE80211_RATE_PREAMBLE2) { + if (status->rate == rate->val) + fi->preamble = htonl(2); /* long */ + else if (status->rate == rate->val2) + fi->preamble = htonl(1); /* short */ + } else + fi->preamble = htonl(0); + } else { + fi->datarate = htonl(0); + fi->preamble = htonl(0); + } + + fi->antenna = htonl(status->antenna); + fi->priority = htonl(0xffffffff); /* no clue */ + fi->ssi_type = htonl(ieee80211_ssi_raw); + fi->ssi_signal = htonl(status->ssi); + fi->ssi_noise = 0x00000000; + fi->encoding = 0; + } else { + /* clear everything because we really don't know. + * the msg_type field isn't present on monitor frames + * so we don't know whether it will be present or not, + * but it's ok to not clear it since it'll be assigned + * anyway */ + memset(fi, 0, sizeof(*fi) - sizeof(fi->msg_type)); + + fi->ssi_type = htonl(ieee80211_ssi_none); } + fi->version = htonl(IEEE80211_FI_VERSION); + fi->length = cpu_to_be32(sizeof(*fi) - sizeof(fi->msg_type)); } +/* this routine is actually not just for this, but also + * for pushing fake 'management' frames into userspace. + * it shall be replaced by a netlink-based system. */ +void +ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, + struct ieee80211_rx_status *status, u32 msg_type) +{ + struct ieee80211_frame_info *fi; + const size_t hlen = sizeof(struct ieee80211_frame_info); + struct ieee80211_sub_if_data *sdata; + + skb->dev = local->apdev; + + sdata = IEEE80211_DEV_TO_SUB_IF(local->apdev); + + if (skb_headroom(skb) < hlen) { + I802_DEBUG_INC(local->rx_expand_skb_head); + if (pskb_expand_head(skb, hlen, 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return; + } + } + + fi = (struct ieee80211_frame_info *) skb_push(skb, hlen); + + ieee80211_fill_frame_info(local, fi, status); + fi->msg_type = htonl(msg_type); + + sdata->stats.rx_packets++; + sdata->stats.rx_bytes += skb->len; + + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); +} -static void ieee80211_key_threshold_notify(struct net_device *dev, - struct ieee80211_key *key, - struct sta_info *sta) +void ieee80211_key_threshold_notify(struct net_device *dev, + struct ieee80211_key *key, + struct sta_info *sta) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sk_buff *skb; @@ -257,4077 +236,557 @@ static void ieee80211_key_threshold_noti ieee80211_msg_key_threshold_notification); } - -static u8 * ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) +static int ieee80211_mgmt_open(struct net_device *dev) { - u16 fc; - - if (len < 24) - return NULL; - - fc = le16_to_cpu(hdr->frame_control); - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - return hdr->addr1; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - return NULL; - case IEEE80211_FCTL_FROMDS: - return hdr->addr2; - case 0: - return hdr->addr3; - } - break; - case IEEE80211_FTYPE_MGMT: - return hdr->addr3; - case IEEE80211_FTYPE_CTL: - if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) - return hdr->addr1; - else - return NULL; - } + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - return NULL; + if (!netif_running(local->mdev)) + return -EOPNOTSUPP; + return 0; } -int ieee80211_get_hdrlen(u16 fc) +static int ieee80211_mgmt_stop(struct net_device *dev) { - int hdrlen = 24; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) - hdrlen = 30; /* Addr4 */ - /* - * The QoS Control field is two bytes and its presence is - * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to - * hdrlen if that bit is set. - * This works by masking out the bit and shifting it to - * bit position 1 so the result has the value 0 or 2. - */ - hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) - >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); - break; - case IEEE80211_FTYPE_CTL: - /* - * ACK and CTS are 10 bytes, all others 16. To see how - * to get this condition consider - * subtype mask: 0b0000000011110000 (0x00F0) - * ACK subtype: 0b0000000011010000 (0x00D0) - * CTS subtype: 0b0000000011000000 (0x00C0) - * bits that matter: ^^^ (0x00E0) - * value of those: 0b0000000011000000 (0x00C0) - */ - if ((fc & 0xE0) == 0xC0) - hdrlen = 10; - else - hdrlen = 16; - break; - } - - return hdrlen; + return 0; } -EXPORT_SYMBOL(ieee80211_get_hdrlen); -int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +static int ieee80211_change_mtu_apdev(struct net_device *dev, int new_mtu) { - const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; - int hdrlen; + /* FIX: what would be proper limits for MTU? + * This interface uses 802.11 frames. */ + if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) { + printk(KERN_WARNING "%s: invalid MTU %d\n", + dev->name, new_mtu); + return -EINVAL; + } - if (unlikely(skb->len < 10)) - return 0; - hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); - if (unlikely(hdrlen > skb->len)) - return 0; - return hdrlen; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + dev->mtu = new_mtu; + return 0; } -EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); -static int ieee80211_get_radiotap_len(struct sk_buff *skb) +void ieee80211_if_mgmt_setup(struct net_device *dev) { - struct ieee80211_radiotap_header *hdr = - (struct ieee80211_radiotap_header *) skb->data; - - return le16_to_cpu(hdr->it_len); + ether_setup(dev); + dev->hard_start_xmit = ieee80211_mgmt_start_xmit; + dev->change_mtu = ieee80211_change_mtu_apdev; + dev->get_stats = ieee80211_get_stats; + dev->open = ieee80211_mgmt_open; + dev->stop = ieee80211_mgmt_stop; + dev->type = ARPHRD_IEEE80211_PRISM; + dev->hard_header_parse = header_parse_80211; + dev->uninit = ieee80211_if_reinit; + dev->destructor = ieee80211_if_free; } -#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP -static void ieee80211_dump_frame(const char *ifname, const char *title, - const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc; - int hdrlen; +/* regular interfaces */ - printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); - if (skb->len < 4) { - printk("\n"); - return; +static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) +{ + /* FIX: what would be proper limits for MTU? + * This interface uses 802.3 frames. */ + if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) { + printk(KERN_WARNING "%s: invalid MTU %d\n", + dev->name, new_mtu); + return -EINVAL; } - fc = le16_to_cpu(hdr->frame_control); - hdrlen = ieee80211_get_hdrlen(fc); - if (hdrlen > skb->len) - hdrlen = skb->len; - if (hdrlen >= 4) - printk(" FC=0x%04x DUR=0x%04x", - fc, le16_to_cpu(hdr->duration_id)); - if (hdrlen >= 10) - printk(" A1=" MAC_FMT, MAC_ARG(hdr->addr1)); - if (hdrlen >= 16) - printk(" A2=" MAC_FMT, MAC_ARG(hdr->addr2)); - if (hdrlen >= 24) - printk(" A3=" MAC_FMT, MAC_ARG(hdr->addr3)); - if (hdrlen >= 30) - printk(" A4=" MAC_FMT, MAC_ARG(hdr->addr4)); - printk("\n"); -} -#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ -static inline void ieee80211_dump_frame(const char *ifname, const char *title, - struct sk_buff *skb) -{ -} -#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ - - -static int ieee80211_is_eapol(const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr; - u16 fc; - int hdrlen; - - if (unlikely(skb->len < 10)) - return 0; - - hdr = (const struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) - return 0; - - hdrlen = ieee80211_get_hdrlen(fc); - - if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) && - memcmp(skb->data + hdrlen, eapol_header, - sizeof(eapol_header)) == 0)) - return 1; - +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + dev->mtu = new_mtu; return 0; } - -static ieee80211_txrx_result -ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) +static inline int identical_mac_addr_allowed(int type1, int type2) { - struct rate_control_extra extra; - - memset(&extra, 0, sizeof(extra)); - extra.mode = tx->u.tx.mode; - extra.mgmt_data = tx->sdata && - tx->sdata->type == IEEE80211_IF_TYPE_MGMT; - extra.ethertype = tx->ethertype; - - tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, tx->skb, - &extra); - if (unlikely(extra.probe != NULL)) { - tx->u.tx.control->flags |= IEEE80211_TXCTL_RATE_CTRL_PROBE; - tx->u.tx.probe_last_frag = 1; - tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; - tx->u.tx.rate = extra.probe; - } else { - tx->u.tx.control->alt_retry_rate = -1; - } - if (!tx->u.tx.rate) - return TXRX_DROP; - if (tx->u.tx.mode->mode == MODE_IEEE80211G && - tx->sdata->use_protection && tx->fragmented && - extra.nonerp) { - tx->u.tx.last_frag_rate = tx->u.tx.rate; - tx->u.tx.probe_last_frag = extra.probe ? 1 : 0; - - tx->u.tx.rate = extra.nonerp; - tx->u.tx.control->rate = extra.nonerp; - tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } else { - tx->u.tx.last_frag_rate = tx->u.tx.rate; - tx->u.tx.control->rate = tx->u.tx.rate; - } - tx->u.tx.control->tx_rate = tx->u.tx.rate->val; - if ((tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && - tx->local->short_preamble && - (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { - tx->u.tx.short_preamble = 1; - tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; - } - - return TXRX_CONTINUE; + return (type1 == IEEE80211_IF_TYPE_MNTR || + type2 == IEEE80211_IF_TYPE_MNTR || + (type1 == IEEE80211_IF_TYPE_AP && + type2 == IEEE80211_IF_TYPE_WDS) || + (type1 == IEEE80211_IF_TYPE_WDS && + (type2 == IEEE80211_IF_TYPE_WDS || + type2 == IEEE80211_IF_TYPE_AP)) || + (type1 == IEEE80211_IF_TYPE_AP && + type2 == IEEE80211_IF_TYPE_VLAN) || + (type1 == IEEE80211_IF_TYPE_VLAN && + (type2 == IEEE80211_IF_TYPE_AP || + type2 == IEEE80211_IF_TYPE_VLAN))); } - -static ieee80211_txrx_result -ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) +/* Check if running monitor interfaces should go to a "soft monitor" mode + * and switch them if necessary. */ +static inline void ieee80211_start_soft_monitor(struct ieee80211_local *local) { - if (tx->sta) - tx->u.tx.control->key_idx = tx->sta->key_idx_compression; - else - tx->u.tx.control->key_idx = HW_KEY_IDX_INVALID; + struct ieee80211_if_init_conf conf; - if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) - tx->key = NULL; - else if (tx->sta && tx->sta->key) - tx->key = tx->sta->key; - else if (tx->sdata->default_key) - tx->key = tx->sdata->default_key; - else if (tx->sdata->drop_unencrypted && - !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); - return TXRX_DROP; - } else - tx->key = NULL; - - if (tx->key) { - tx->key->tx_rx_count++; - if (unlikely(tx->local->key_tx_rx_threshold && - tx->key->tx_rx_count > - tx->local->key_tx_rx_threshold)) { - ieee80211_key_threshold_notify(tx->dev, tx->key, - tx->sta); - } - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - size_t hdrlen, per_fragm, num_fragm, payload_len, left; - struct sk_buff **frags, *first, *frag; - int i; - u16 seq; - u8 *pos; - int frag_threshold = tx->local->fragmentation_threshold; - - if (!tx->fragmented) - return TXRX_CONTINUE; - - first = tx->skb; - - hdrlen = ieee80211_get_hdrlen(tx->fc); - payload_len = first->len - hdrlen; - per_fragm = frag_threshold - hdrlen - FCS_LEN; - num_fragm = (payload_len + per_fragm - 1) / per_fragm; - - frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); - if (!frags) - goto fail; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); - seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; - pos = first->data + hdrlen + per_fragm; - left = payload_len - per_fragm; - for (i = 0; i < num_fragm - 1; i++) { - struct ieee80211_hdr *fhdr; - size_t copylen; - - if (left <= 0) - goto fail; - - /* reserve enough extra head and tail room for possible - * encryption */ - frag = frags[i] = - dev_alloc_skb(tx->local->tx_headroom + - frag_threshold + - IEEE80211_ENCRYPT_HEADROOM + - IEEE80211_ENCRYPT_TAILROOM); - if (!frag) - goto fail; - /* Make sure that all fragments use the same priority so - * that they end up using the same TX queue */ - frag->priority = first->priority; - skb_reserve(frag, tx->local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); - fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); - memcpy(fhdr, first->data, hdrlen); - if (i == num_fragm - 2) - fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); - fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); - copylen = left > per_fragm ? per_fragm : left; - memcpy(skb_put(frag, copylen), pos, copylen); - - pos += copylen; - left -= copylen; - } - skb_trim(first, hdrlen + per_fragm); - - tx->u.tx.num_extra_frag = num_fragm - 1; - tx->u.tx.extra_frag = frags; - - return TXRX_CONTINUE; - - fail: - printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); - if (frags) { - for (i = 0; i < num_fragm - 1; i++) - if (frags[i]) - dev_kfree_skb(frags[i]); - kfree(frags); - } - I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); - return TXRX_DROP; -} - - -static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb) -{ - if (tx->key->force_sw_encrypt) { - if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) - return -1; - } else { - tx->u.tx.control->key_idx = tx->key->hw_key_idx; - if (tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - if (ieee80211_wep_add_iv(tx->local, skb, tx->key) == - NULL) - return -1; - } - } - return 0; -} - - -void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (tx->u.tx.extra_frag) { - struct ieee80211_hdr *fhdr; - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - fhdr = (struct ieee80211_hdr *) - tx->u.tx.extra_frag[i]->data; - fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - } - } -} - - -static ieee80211_txrx_result -ieee80211_tx_h_wep_encrypt(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 fc; - - fc = le16_to_cpu(hdr->frame_control); - - if (!tx->key || tx->key->alg != ALG_WEP || - ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) - return TXRX_CONTINUE; - - tx->u.tx.control->iv_len = WEP_IV_LEN; - tx->u.tx.control->icv_len = WEP_ICV_LEN; - ieee80211_tx_set_iswep(tx); - - if (wep_encrypt_skb(tx, tx->skb) < 0) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); - return TXRX_DROP; - } - - if (tx->u.tx.extra_frag) { - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) { - I802_DEBUG_INC(tx->local-> - tx_handlers_drop_wep); - return TXRX_DROP; - } - } - } - - return TXRX_CONTINUE; -} - - -static int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, - int rate, int erp, int short_preamble) -{ - int dur; - - /* calculate duration (in microseconds, rounded up to next higher - * integer if it includes a fractional microsecond) to send frame of - * len bytes (does not include FCS) at the given rate. Duration will - * also include SIFS. - * - * rate is in 100 kbps, so divident is multiplied by 10 in the - * DIV_ROUND_UP() operations. - */ - - if (local->hw.conf.phymode == MODE_IEEE80211A || erp || - local->hw.conf.phymode == MODE_ATHEROS_TURBO) { - /* - * OFDM: - * - * N_DBPS = DATARATE x 4 - * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS) - * (16 = SIGNAL time, 6 = tail bits) - * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext - * - * T_SYM = 4 usec - * 802.11a - 17.5.2: aSIFSTime = 16 usec - * 802.11g - 19.8.4: aSIFSTime = 10 usec + - * signal ext = 6 usec - */ - /* FIX: Atheros Turbo may have different (shorter) duration? */ - dur = 16; /* SIFS + signal ext */ - dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ - dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ - dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, - 4 * rate); /* T_SYM x N_SYM */ - } else { - /* - * 802.11b or 802.11g with 802.11b compatibility: - * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime + - * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0. - * - * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 - * aSIFSTime = 10 usec - * aPreambleLength = 144 usec or 72 usec with short preamble - * aPLCPHeaderLength = 48 usec or 24 usec with short preamble - */ - dur = 10; /* aSIFSTime = 10 usec */ - dur += short_preamble ? (72 + 24) : (144 + 48); - - dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate); - } - - return dur; -} - - -/* Exported duration function for driver use */ -__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, - size_t frame_len, int rate) -{ - struct ieee80211_local *local = hw_to_local(hw); - u16 dur; - int erp; - - erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); - dur = ieee80211_frame_duration(local, frame_len, rate, - erp, local->short_preamble); - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_generic_frame_duration); - - -static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr, - int next_frag_len) -{ - int rate, mrate, erp, dur, i; - struct ieee80211_rate *txrate = tx->u.tx.rate; - struct ieee80211_local *local = tx->local; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - - erp = txrate->flags & IEEE80211_RATE_ERP; - - /* - * data and mgmt (except PS Poll): - * - during CFP: 32768 - * - during contention period: - * if addr1 is group address: 0 - * if more fragments = 0 and addr1 is individual address: time to - * transmit one ACK plus SIFS - * if more fragments = 1 and addr1 is individual address: time to - * transmit next fragment plus 2 x ACK plus 3 x SIFS - * - * IEEE 802.11, 9.6: - * - control response frame (CTS or ACK) shall be transmitted using the - * same rate as the immediately previous frame in the frame exchange - * sequence, if this rate belongs to the PHY mandatory rates, or else - * at the highest possible rate belonging to the PHY rates in the - * BSSBasicRateSet - */ - - if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { - /* TODO: These control frames are not currently sent by - * 80211.o, but should they be implemented, this function - * needs to be updated to support duration field calculation. - * - * RTS: time needed to transmit pending data/mgmt frame plus - * one CTS frame plus one ACK frame plus 3 x SIFS - * CTS: duration of immediately previous RTS minus time - * required to transmit CTS and its SIFS - * ACK: 0 if immediately previous directed data/mgmt had - * more=0, with more=1 duration in ACK frame is duration - * from previous frame minus time needed to transmit ACK - * and its SIFS - * PS Poll: BIT(15) | BIT(14) | aid - */ - return 0; - } - - /* data/mgmt */ - if (0 /* FIX: data/mgmt during CFP */) - return 32768; - - if (group_addr) /* Group address as the destination - no ACK */ - return 0; - - /* Individual destination address: - * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes) - * CTS and ACK frames shall be transmitted using the highest rate in - * basic rate set that is less than or equal to the rate of the - * immediately previous frame and that is using the same modulation - * (CCK or OFDM). If no basic rate set matches with these requirements, - * the highest mandatory rate of the PHY that is less than or equal to - * the rate of the previous frame is used. - * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps - */ - rate = -1; - mrate = 10; /* use 1 Mbps if everything fails */ - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *r = &mode->rates[i]; - if (r->rate > txrate->rate) - break; - - if (IEEE80211_RATE_MODULATION(txrate->flags) != - IEEE80211_RATE_MODULATION(r->flags)) - continue; - - if (r->flags & IEEE80211_RATE_BASIC) - rate = r->rate; - else if (r->flags & IEEE80211_RATE_MANDATORY) - mrate = r->rate; - } - if (rate == -1) { - /* No matching basic rate found; use highest suitable mandatory - * PHY rate */ - rate = mrate; - } - - /* Time needed to transmit ACK - * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up - * to closest integer */ - - dur = ieee80211_frame_duration(local, 10, rate, erp, - local->short_preamble); - - if (next_frag_len) { - /* Frame is fragmented: duration increases with time needed to - * transmit next fragment plus ACK and 2 x SIFS. */ - dur *= 2; /* ACK + SIFS */ - /* next fragment */ - dur += ieee80211_frame_duration(local, next_frag_len, - txrate->rate, erp, - local->short_preamble); - } - - return dur; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - u16 dur; - struct ieee80211_tx_control *control = tx->u.tx.control; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - - if (!is_multicast_ether_addr(hdr->addr1)) { - if (tx->skb->len + FCS_LEN > tx->local->rts_threshold && - tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD) { - control->flags |= IEEE80211_TXCTL_USE_RTS_CTS; - control->retry_limit = - tx->local->long_retry_limit; - } else { - control->retry_limit = - tx->local->short_retry_limit; - } - } else { - control->retry_limit = 1; - } - - if (tx->fragmented) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - control->alt_retry_rate = -1; - } - - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if (mode->mode == MODE_IEEE80211G && - (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && - tx->u.tx.unicast && tx->sdata->use_protection && - !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) - control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; - - /* Setup duration field for the first fragment of the frame. Duration - * for remaining fragments will be updated when they are being sent - * to low-level driver in ieee80211_tx(). */ - dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), - tx->fragmented ? tx->u.tx.extra_frag[0]->len : - 0); - hdr->duration_id = cpu_to_le16(dur); - - if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || - (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { - struct ieee80211_rate *rate; - - /* Do not use multiple retry rates when using RTS/CTS */ - control->alt_retry_rate = -1; - - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = tx->u.tx.rate; - while (rate > mode->rates && - !(rate->flags & IEEE80211_RATE_BASIC)) - rate--; - - control->rts_cts_rate = rate->val; - control->rts_rate = rate; - } - - if (tx->sta) { - tx->sta->tx_packets++; - tx->sta->tx_fragments++; - tx->sta->tx_bytes += tx->skb->len; - if (tx->u.tx.extra_frag) { - int i; - tx->sta->tx_fragments += tx->u.tx.num_extra_frag; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - tx->sta->tx_bytes += - tx->u.tx.extra_frag[i]->len; - } - } - } - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx) -{ -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - u32 sta_flags; - - if (unlikely(tx->local->sta_scanning != 0) && - ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) - return TXRX_DROP; - - if (tx->u.tx.ps_buffered) - return TXRX_CONTINUE; - - sta_flags = tx->sta ? tx->sta->flags : 0; - - if (likely(tx->u.tx.unicast)) { - if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->type != IEEE80211_IF_TYPE_IBSS && - (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: dropped data frame to not " - "associated station " MAC_FMT "\n", - tx->dev->name, MAC_ARG(hdr->addr1)); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); - return TXRX_DROP; - } - } else { - if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - tx->local->num_sta == 0 && - !tx->local->allow_broadcast_always && - tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { - /* - * No associated STAs - no need to send multicast - * frames. - */ - return TXRX_DROP; - } - return TXRX_CONTINUE; - } - - if (unlikely(!tx->u.tx.mgmt_interface && tx->sdata->ieee802_1x && - !(sta_flags & WLAN_STA_AUTHORIZED))) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: dropped frame to " MAC_FMT - " (unauthorized port)\n", tx->dev->name, - MAC_ARG(hdr->addr1)); -#endif - I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port); - return TXRX_DROP; - } - - return TXRX_CONTINUE; -} - -static ieee80211_txrx_result -ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - - if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) - ieee80211_include_sequence(tx->sdata, hdr); - - return TXRX_CONTINUE; -} - -/* This function is called whenever the AP is about to exceed the maximum limit - * of buffered frames for power saving STAs. This situation should not really - * happen often during normal operation, so dropping the oldest buffered packet - * from each queue should be OK to make some room for new frames. */ -static void purge_old_ps_buffers(struct ieee80211_local *local) -{ - int total = 0, purged = 0; - struct sk_buff *skb; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - struct ieee80211_if_ap *ap; - if (sdata->dev == local->mdev || - sdata->type != IEEE80211_IF_TYPE_AP) - continue; - ap = &sdata->u.ap; - skb = skb_dequeue(&ap->ps_bc_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); - } - total += skb_queue_len(&ap->ps_bc_buf); - } - read_unlock(&local->sub_if_lock); - - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - skb = skb_dequeue(&sta->ps_tx_buf); - if (skb) { - purged++; - dev_kfree_skb(skb); - } - total += skb_queue_len(&sta->ps_tx_buf); - } - spin_unlock_bh(&local->sta_lock); - - local->total_ps_buffered = total; - printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", - local->mdev->name, purged); -} - - -static inline ieee80211_txrx_result -ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) -{ - /* broadcast/multicast frame */ - /* If any of the associated stations is in power save mode, - * the frame is buffered to be sent after DTIM beacon frame */ - if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && - tx->sdata->type != IEEE80211_IF_TYPE_WDS && - tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && - !(tx->fc & IEEE80211_FCTL_ORDER)) { - if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) - purge_old_ps_buffers(tx->local); - if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= - AP_MAX_BC_BUFFER) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: BC TX buffer full - " - "dropping the oldest frame\n", - tx->dev->name); - } - dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); - } else - tx->local->total_ps_buffered++; - skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb); - return TXRX_QUEUED; - } - - return TXRX_CONTINUE; -} - - -static inline ieee80211_txrx_result -ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) -{ - struct sta_info *sta = tx->sta; - - if (unlikely(!sta || - ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) - return TXRX_CONTINUE; - - if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) { - struct ieee80211_tx_packet_data *pkt_data; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS buffer (entries " - "before %d)\n", - MAC_ARG(sta->addr), sta->aid, - skb_queue_len(&sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->flags |= WLAN_STA_TIM; - if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) - purge_old_ps_buffers(tx->local); - if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { - struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " TX " - "buffer full - dropping oldest frame\n", - tx->dev->name, MAC_ARG(sta->addr)); - } - dev_kfree_skb(old); - } else - tx->local->total_ps_buffered++; - /* Queue frame to be sent after STA sends an PS Poll frame */ - if (skb_queue_empty(&sta->ps_tx_buf)) { - if (tx->local->ops->set_tim) - tx->local->ops->set_tim(local_to_hw(tx->local), - sta->aid, 1); - if (tx->sdata->bss) - bss_tim_set(tx->local, tx->sdata->bss, sta->aid); - } - pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; - pkt_data->jiffies = jiffies; - skb_queue_tail(&sta->ps_tx_buf, tx->skb); - return TXRX_QUEUED; - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - else if (unlikely(sta->flags & WLAN_STA_PS)) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " in PS mode, but pspoll " - "set -> send frame\n", tx->dev->name, - MAC_ARG(sta->addr)); - } -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - sta->pspoll = 0; - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) -{ - if (unlikely(tx->u.tx.ps_buffered)) - return TXRX_CONTINUE; - - if (tx->u.tx.unicast) - return ieee80211_tx_h_unicast_ps_buf(tx); - else - return ieee80211_tx_h_multicast_ps_buf(tx); -} - - -/* - * deal with packet injection down monitor interface - * with Radiotap Header -- only called for monitor mode interface - */ - -static ieee80211_txrx_result -__ieee80211_parse_tx_radiotap( - struct ieee80211_txrx_data *tx, - struct sk_buff *skb, struct ieee80211_tx_control *control) -{ - /* - * this is the moment to interpret and discard the radiotap header that - * must be at the start of the packet injected in Monitor mode - * - * Need to take some care with endian-ness since radiotap - * args are little-endian - */ - - struct ieee80211_radiotap_iterator iterator; - struct ieee80211_radiotap_header *rthdr = - (struct ieee80211_radiotap_header *) skb->data; - struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - - /* - * default control situation for all injected packets - * FIXME: this does not suit all usage cases, expand to allow control - */ - - control->retry_limit = 1; /* no retry */ - control->key_idx = -1; /* no encryption key */ - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT); - control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | - IEEE80211_TXCTL_NO_ACK; - control->antenna_sel_tx = 0; /* default to default antenna */ - - /* - * for every radiotap entry that is present - * (ieee80211_radiotap_iterator_next returns -ENOENT when no more - * entries present, or -EINVAL on error) - */ - - while (!ret) { - int i, target_rate; - - ret = ieee80211_radiotap_iterator_next(&iterator); - - if (ret) - continue; - - /* see if this argument is something we can use */ - switch (iterator.this_arg_index) { - /* - * You must take care when dereferencing iterator.this_arg - * for multibyte types... the pointer is not aligned. Use - * get_unaligned((type *)iterator.this_arg) to dereference - * iterator.this_arg for type "type" safely on all arches. - */ - case IEEE80211_RADIOTAP_RATE: - /* - * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps - * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps - */ - target_rate = (*iterator.this_arg) * 5; - for (i = 0; i < mode->num_rates; i++) { - struct ieee80211_rate *r = &mode->rates[i]; - - if (r->rate > target_rate) - continue; - - control->rate = r; - - if (r->flags & IEEE80211_RATE_PREAMBLE2) - control->tx_rate = r->val2; - else - control->tx_rate = r->val; - - /* end on exact match */ - if (r->rate == target_rate) - i = mode->num_rates; - } - break; - - case IEEE80211_RADIOTAP_ANTENNA: - /* - * radiotap uses 0 for 1st ant, mac80211 is 1 for - * 1st ant - */ - control->antenna_sel_tx = (*iterator.this_arg) + 1; - break; - - case IEEE80211_RADIOTAP_DBM_TX_POWER: - control->power_level = *iterator.this_arg; - break; - - case IEEE80211_RADIOTAP_FLAGS: - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { - /* - * this indicates that the skb we have been - * handed has the 32-bit FCS CRC at the end... - * we should react to that by snipping it off - * because it will be recomputed and added - * on transmission - */ - if (skb->len < (iterator.max_length + FCS_LEN)) - return TXRX_DROP; - - skb_trim(skb, skb->len - FCS_LEN); - } - break; - - default: - break; - } - } - - if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ - return TXRX_DROP; - - /* - * remove the radiotap header - * iterator->max_length was sanity-checked against - * skb->len by iterator init - */ - skb_pull(skb, iterator.max_length); - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result inline -__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, - struct sk_buff *skb, - struct net_device *dev, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; - ieee80211_txrx_result res = TXRX_CONTINUE; - - int hdrlen; - - memset(tx, 0, sizeof(*tx)); - tx->skb = skb; - tx->dev = dev; /* use original interface */ - tx->local = local; - tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); - tx->sta = sta_info_get(local, hdr->addr1); - tx->fc = le16_to_cpu(hdr->frame_control); - - /* - * set defaults for things that can be set by - * injected radiotap headers - */ - control->power_level = local->hw.conf.power_level; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - if (local->sta_antenna_sel != STA_ANTENNA_SEL_AUTO && tx->sta) - control->antenna_sel_tx = tx->sta->antenna_sel_tx; - - /* process and remove the injection radiotap header */ - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { - if (__ieee80211_parse_tx_radiotap(tx, skb, control) == - TXRX_DROP) { - return TXRX_DROP; - } - /* - * we removed the radiotap header after this point, - * we filled control with what we could use - * set to the actual ieee header now - */ - hdr = (struct ieee80211_hdr *) skb->data; - res = TXRX_QUEUED; /* indication it was monitor packet */ - } - - tx->u.tx.control = control; - tx->u.tx.unicast = !is_multicast_ether_addr(hdr->addr1); - if (is_multicast_ether_addr(hdr->addr1)) - control->flags |= IEEE80211_TXCTL_NO_ACK; - else - control->flags &= ~IEEE80211_TXCTL_NO_ACK; - tx->fragmented = local->fragmentation_threshold < - IEEE80211_MAX_FRAG_THRESHOLD && tx->u.tx.unicast && - skb->len + FCS_LEN > local->fragmentation_threshold && - (!local->ops->set_frag_threshold); - if (!tx->sta) - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - else if (tx->sta->clear_dst_mask) { - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - tx->sta->clear_dst_mask = 0; - } - hdrlen = ieee80211_get_hdrlen(tx->fc); - if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { - u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; - tx->ethertype = (pos[0] << 8) | pos[1]; - } - control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; - - return res; -} - -static int inline is_ieee80211_device(struct net_device *dev, - struct net_device *master) -{ - return (wdev_priv(dev->ieee80211_ptr) == - wdev_priv(master->ieee80211_ptr)); -} - -/* Device in tx->dev has a reference added; use dev_put(tx->dev) when - * finished with it. */ -static int inline ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, - struct sk_buff *skb, - struct net_device *mdev, - struct ieee80211_tx_control *control) -{ - struct ieee80211_tx_packet_data *pkt_data; - struct net_device *dev; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - dev = dev_get_by_index(pkt_data->ifindex); - if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { - dev_put(dev); - dev = NULL; - } - if (unlikely(!dev)) - return -ENODEV; - __ieee80211_tx_prepare(tx, skb, dev, control); - return 0; -} - -static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} - -static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, - int queue) -{ - return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); -} - -#define IEEE80211_TX_OK 0 -#define IEEE80211_TX_AGAIN 1 -#define IEEE80211_TX_FRAG_AGAIN 2 - -static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_txrx_data *tx) -{ - struct ieee80211_tx_control *control = tx->u.tx.control; - int ret, i; - - if (!ieee80211_qdisc_installed(local->mdev) && - __ieee80211_queue_stopped(local, 0)) { - netif_stop_queue(local->mdev); - return IEEE80211_TX_AGAIN; - } - if (skb) { - ieee80211_dump_frame(local->mdev->name, "TX to low-level driver", skb); - ret = local->ops->tx(local_to_hw(local), skb, control); - if (ret) - return IEEE80211_TX_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - } - if (tx->u.tx.extra_frag) { - control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | - IEEE80211_TXCTL_USE_CTS_PROTECT | - IEEE80211_TXCTL_CLEAR_DST_MASK | - IEEE80211_TXCTL_FIRST_FRAGMENT); - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - if (!tx->u.tx.extra_frag[i]) - continue; - if (__ieee80211_queue_stopped(local, control->queue)) - return IEEE80211_TX_FRAG_AGAIN; - if (i == tx->u.tx.num_extra_frag) { - control->tx_rate = tx->u.tx.last_frag_hwrate; - control->rate = tx->u.tx.last_frag_rate; - if (tx->u.tx.probe_last_frag) - control->flags |= - IEEE80211_TXCTL_RATE_CTRL_PROBE; - else - control->flags &= - ~IEEE80211_TXCTL_RATE_CTRL_PROBE; - } - - ieee80211_dump_frame(local->mdev->name, - "TX to low-level driver", - tx->u.tx.extra_frag[i]); - ret = local->ops->tx(local_to_hw(local), - tx->u.tx.extra_frag[i], - control); - if (ret) - return IEEE80211_TX_FRAG_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - tx->u.tx.extra_frag[i] = NULL; - } - kfree(tx->u.tx.extra_frag); - tx->u.tx.extra_frag = NULL; - } - return IEEE80211_TX_OK; -} - -static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_tx_control *control, int mgmt) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - ieee80211_tx_handler *handler; - struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP, res_prepare; - int ret, i; - - WARN_ON(__ieee80211_queue_pending(local, control->queue)); - - if (unlikely(skb->len < 10)) { - dev_kfree_skb(skb); - return 0; - } - - res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); - - if (res_prepare == TXRX_DROP) { - dev_kfree_skb(skb); - return 0; - } - - sta = tx.sta; - tx.u.tx.mgmt_interface = mgmt; - tx.u.tx.mode = local->hw.conf.mode; - - if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ - res = TXRX_CONTINUE; - } else { - for (handler = local->tx_handlers; *handler != NULL; - handler++) { - res = (*handler)(&tx); - if (res != TXRX_CONTINUE) - break; - } - } - - skb = tx.skb; /* handlers are allowed to change skb */ - - if (sta) - sta_info_put(sta); - - if (unlikely(res == TXRX_DROP)) { - I802_DEBUG_INC(local->tx_handlers_drop); - goto drop; - } - - if (unlikely(res == TXRX_QUEUED)) { - I802_DEBUG_INC(local->tx_handlers_queued); - return 0; - } - - if (tx.u.tx.extra_frag) { - for (i = 0; i < tx.u.tx.num_extra_frag; i++) { - int next_len, dur; - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) - tx.u.tx.extra_frag[i]->data; - - if (i + 1 < tx.u.tx.num_extra_frag) { - next_len = tx.u.tx.extra_frag[i + 1]->len; - } else { - next_len = 0; - tx.u.tx.rate = tx.u.tx.last_frag_rate; - tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val; - } - dur = ieee80211_duration(&tx, 0, next_len); - hdr->duration_id = cpu_to_le16(dur); - } - } - -retry: - ret = __ieee80211_tx(local, skb, &tx); - if (ret) { - struct ieee80211_tx_stored_packet *store = - &local->pending_packet[control->queue]; - - if (ret == IEEE80211_TX_FRAG_AGAIN) - skb = NULL; - set_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - smp_mb(); - /* When the driver gets out of buffers during sending of - * fragments and calls ieee80211_stop_queue, there is - * a small window between IEEE80211_LINK_STATE_XOFF and - * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer - * gets available in that window (i.e. driver calls - * ieee80211_wake_queue), we would end up with ieee80211_tx - * called with IEEE80211_LINK_STATE_PENDING. Prevent this by - * continuing transmitting here when that situation is - * possible to have happened. */ - if (!__ieee80211_queue_stopped(local, control->queue)) { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[control->queue]); - goto retry; - } - memcpy(&store->control, control, - sizeof(struct ieee80211_tx_control)); - store->skb = skb; - store->extra_frag = tx.u.tx.extra_frag; - store->num_extra_frag = tx.u.tx.num_extra_frag; - store->last_frag_hwrate = tx.u.tx.last_frag_hwrate; - store->last_frag_rate = tx.u.tx.last_frag_rate; - store->last_frag_rate_ctrl_probe = tx.u.tx.probe_last_frag; - } - return 0; - - drop: - if (skb) - dev_kfree_skb(skb); - for (i = 0; i < tx.u.tx.num_extra_frag; i++) - if (tx.u.tx.extra_frag[i]) - dev_kfree_skb(tx.u.tx.extra_frag[i]); - kfree(tx.u.tx.extra_frag); - return 0; -} - -static void ieee80211_tx_pending(unsigned long data) -{ - struct ieee80211_local *local = (struct ieee80211_local *)data; - struct net_device *dev = local->mdev; - struct ieee80211_tx_stored_packet *store; - struct ieee80211_txrx_data tx; - int i, ret, reschedule = 0; - - netif_tx_lock_bh(dev); - for (i = 0; i < local->hw.queues; i++) { - if (__ieee80211_queue_stopped(local, i)) - continue; - if (!__ieee80211_queue_pending(local, i)) { - reschedule = 1; - continue; - } - store = &local->pending_packet[i]; - tx.u.tx.control = &store->control; - tx.u.tx.extra_frag = store->extra_frag; - tx.u.tx.num_extra_frag = store->num_extra_frag; - tx.u.tx.last_frag_hwrate = store->last_frag_hwrate; - tx.u.tx.last_frag_rate = store->last_frag_rate; - tx.u.tx.probe_last_frag = store->last_frag_rate_ctrl_probe; - ret = __ieee80211_tx(local, store->skb, &tx); - if (ret) { - if (ret == IEEE80211_TX_FRAG_AGAIN) - store->skb = NULL; - } else { - clear_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[i]); - reschedule = 1; - } - } - netif_tx_unlock_bh(dev); - if (reschedule) { - if (!ieee80211_qdisc_installed(dev)) { - if (!__ieee80211_queue_stopped(local, 0)) - netif_wake_queue(dev); - } else - netif_schedule(dev); - } -} - -static void ieee80211_clear_tx_pending(struct ieee80211_local *local) -{ - int i, j; - struct ieee80211_tx_stored_packet *store; - - for (i = 0; i < local->hw.queues; i++) { - if (!__ieee80211_queue_pending(local, i)) - continue; - store = &local->pending_packet[i]; - kfree_skb(store->skb); - for (j = 0; j < store->num_extra_frag; j++) - kfree_skb(store->extra_frag[j]); - kfree(store->extra_frag); - clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); - } -} - -static int ieee80211_master_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct ieee80211_tx_control control; - struct ieee80211_tx_packet_data *pkt_data; - struct net_device *odev = NULL; - struct ieee80211_sub_if_data *osdata; - int headroom; - int ret; - - /* - * copy control out of the skb so other people can use skb->cb - */ - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(&control, 0, sizeof(struct ieee80211_tx_control)); - - if (pkt_data->ifindex) - odev = dev_get_by_index(pkt_data->ifindex); - if (unlikely(odev && !is_ieee80211_device(odev, dev))) { - dev_put(odev); - odev = NULL; - } - if (unlikely(!odev)) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Discarded packet with nonexistent " - "originating device\n", dev->name); -#endif - dev_kfree_skb(skb); - return 0; - } - osdata = IEEE80211_DEV_TO_SUB_IF(odev); - - headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; - if (skb_headroom(skb) < headroom) { - if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - dev_put(odev); - return 0; - } - } - - control.ifindex = odev->ifindex; - control.type = osdata->type; - if (pkt_data->req_tx_status) - control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; - if (pkt_data->do_not_encrypt) - control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; - if (pkt_data->requeue) - control.flags |= IEEE80211_TXCTL_REQUEUE; - control.queue = pkt_data->queue; - - ret = ieee80211_tx(odev, skb, &control, - control.type == IEEE80211_IF_TYPE_MGMT); - dev_put(odev); - - return ret; -} - - -int ieee80211_monitor_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_radiotap_header *prthdr = - (struct ieee80211_radiotap_header *)skb->data; - u16 len; - - /* - * there must be a radiotap header at the - * start in this case - */ - if (unlikely(prthdr->it_version)) { - /* only version 0 is supported */ - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - - skb->dev = local->mdev; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(*pkt_data)); - pkt_data->ifindex = dev->ifindex; - pkt_data->mgmt_iface = 0; - pkt_data->do_not_encrypt = 1; - - /* above needed because we set skb device to master */ - - /* - * fix up the pointers accounting for the radiotap - * header still being in there. We are being given - * a precooked IEEE80211 header so no need for - * normal processing - */ - len = le16_to_cpu(get_unaligned(&prthdr->it_len)); - skb_set_mac_header(skb, len); - skb_set_network_header(skb, len + sizeof(struct ieee80211_hdr)); - skb_set_transport_header(skb, len + sizeof(struct ieee80211_hdr)); - - /* - * pass the radiotap header up to - * the next stage intact - */ - dev_queue_xmit(skb); - - return NETDEV_TX_OK; -} - - -/** - * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type - * subinterfaces (wlan#, WDS, and VLAN interfaces) - * @skb: packet to be sent - * @dev: incoming interface - * - * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will - * not be freed, and caller is responsible for either retrying later or freeing - * skb). - * - * This function takes in an Ethernet header and encapsulates it with suitable - * IEEE 802.11 header based on which interface the packet is coming in. The - * encapsulated packet will then be passed to master interface, wlan#.11, for - * transmission (through low-level driver). - */ -int ieee80211_subif_start_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_sub_if_data *sdata; - int ret = 1, head_need; - u16 ethertype, hdrlen, fc; - struct ieee80211_hdr hdr; - const u8 *encaps_data; - int encaps_len, skip_header_bytes; - int nh_pos, h_pos, no_encrypt = 0; - struct sta_info *sta; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(skb->len < ETH_HLEN)) { - printk(KERN_DEBUG "%s: short skb (len=%d)\n", - dev->name, skb->len); - ret = 0; - goto fail; - } - - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - - /* convert Ethernet header to proper 802.11 header (based on - * operation mode) */ - ethertype = (skb->data[12] << 8) | skb->data[13]; - /* TODO: handling for 802.1x authorized/unauthorized port */ - fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; - - if (likely(sdata->type == IEEE80211_IF_TYPE_AP || - sdata->type == IEEE80211_IF_TYPE_VLAN)) { - fc |= IEEE80211_FCTL_FROMDS; - /* DA BSSID SA */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 24; - } else if (sdata->type == IEEE80211_IF_TYPE_WDS) { - fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; - /* RA TA DA SA */ - memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 30; - } else if (sdata->type == IEEE80211_IF_TYPE_STA) { - fc |= IEEE80211_FCTL_TODS; - /* BSSID SA DA */ - memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; - } else if (sdata->type == IEEE80211_IF_TYPE_IBSS) { - /* DA SA BSSID */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); - hdrlen = 24; - } else { - ret = 0; - goto fail; - } - - /* receiver is QoS enabled, use a QoS type frame */ - sta = sta_info_get(local, hdr.addr1); - if (sta) { - if (sta->flags & WLAN_STA_WME) { - fc |= IEEE80211_STYPE_QOS_DATA; - hdrlen += 2; - } - sta_info_put(sta); - } - - hdr.frame_control = cpu_to_le16(fc); - hdr.duration_id = 0; - hdr.seq_ctrl = 0; - - skip_header_bytes = ETH_HLEN; - if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { - encaps_data = bridge_tunnel_header; - encaps_len = sizeof(bridge_tunnel_header); - skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { - encaps_data = rfc1042_header; - encaps_len = sizeof(rfc1042_header); - skip_header_bytes -= 2; - } else { - encaps_data = NULL; - encaps_len = 0; - } - - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - - /* TODO: implement support for fragments so that there is no need to - * reallocate and copy payload; it might be enough to support one - * extra fragment that would be copied in the beginning of the frame - * data.. anyway, it would be nice to include this into skb structure - * somehow - * - * There are few options for this: - * use skb->cb as an extra space for 802.11 header - * allocate new buffer if not enough headroom - * make sure that there is enough headroom in every skb by increasing - * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and - * alloc_skb() (net/core/skbuff.c) - */ - head_need = hdrlen + encaps_len + local->tx_headroom; - head_need -= skb_headroom(skb); - - /* We are going to modify skb data, so make a copy of it if happens to - * be cloned. This could happen, e.g., with Linux bridge code passing - * us broadcast frames. */ - - if (head_need > 0 || skb_cloned(skb)) { -#if 0 - printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " - "of headroom\n", dev->name, head_need); -#endif - - if (skb_cloned(skb)) - I802_DEBUG_INC(local->tx_expand_skb_head_cloned); - else - I802_DEBUG_INC(local->tx_expand_skb_head); - /* Since we have to reallocate the buffer, make sure that there - * is enough room for possible WEP IV/ICV and TKIP (8 bytes - * before payload and 12 after). */ - if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), - 12, GFP_ATOMIC)) { - printk(KERN_DEBUG "%s: failed to reallocate TX buffer" - "\n", dev->name); - goto fail; - } - } - - if (encaps_data) { - memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } - memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - nh_pos += hdrlen; - h_pos += hdrlen; - - pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->do_not_encrypt = no_encrypt; - - skb->dev = local->mdev; - sdata->stats.tx_packets++; - sdata->stats.tx_bytes += skb->len; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ - skb_set_mac_header(skb, 0); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); - - dev->trans_start = jiffies; - dev_queue_xmit(skb); - - return 0; - - fail: - if (!ret) - dev_kfree_skb(skb); - - return ret; -} - - -/* - * This is the transmit routine for the 802.11 type interfaces - * called by upper layers of the linux networking - * stack when it has a frame to transmit - */ -static int -ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - struct ieee80211_hdr *hdr; - u16 fc; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (skb->len < 10) { - dev_kfree_skb(skb); - return 0; - } - - if (skb_headroom(skb) < sdata->local->tx_headroom) { - if (pskb_expand_head(skb, sdata->local->tx_headroom, - 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return 0; - } - } - - hdr = (struct ieee80211_hdr *) skb->data; - fc = le16_to_cpu(hdr->frame_control); - - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); - pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - - skb->priority = 20; /* use hardcoded priority for mgmt TX queue */ - skb->dev = sdata->local->mdev; - - /* - * We're using the protocol field of the the frame control header - * to request TX callback for hostapd. BIT(1) is checked. - */ - if ((fc & BIT(1)) == BIT(1)) { - pkt_data->req_tx_status = 1; - fc &= ~BIT(1); - hdr->frame_control = cpu_to_le16(fc); - } - - pkt_data->do_not_encrypt = !(fc & IEEE80211_FCTL_PROTECTED); - - sdata->stats.tx_packets++; - sdata->stats.tx_bytes += skb->len; - - dev_queue_xmit(skb); - - return 0; -} - - -static void ieee80211_beacon_add_tim(struct ieee80211_local *local, - struct ieee80211_if_ap *bss, - struct sk_buff *skb) -{ - u8 *pos, *tim; - int aid0 = 0; - int i, have_bits = 0, n1, n2; - - /* Generate bitmap for TIM only if there are any STAs in power save - * mode. */ - spin_lock_bh(&local->sta_lock); - if (atomic_read(&bss->num_sta_ps) > 0) - /* in the hope that this is faster than - * checking byte-for-byte */ - have_bits = !bitmap_empty((unsigned long*)bss->tim, - IEEE80211_MAX_AID+1); - - if (bss->dtim_count == 0) - bss->dtim_count = bss->dtim_period - 1; - else - bss->dtim_count--; - - tim = pos = (u8 *) skb_put(skb, 6); - *pos++ = WLAN_EID_TIM; - *pos++ = 4; - *pos++ = bss->dtim_count; - *pos++ = bss->dtim_period; - - if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) - aid0 = 1; - - if (have_bits) { - /* Find largest even number N1 so that bits numbered 1 through - * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits - * (N2 + 1) x 8 through 2007 are 0. */ - n1 = 0; - for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) { - if (bss->tim[i]) { - n1 = i & 0xfe; - break; - } - } - n2 = n1; - for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) { - if (bss->tim[i]) { - n2 = i; - break; - } - } - - /* Bitmap control */ - *pos++ = n1 | aid0; - /* Part Virt Bitmap */ - memcpy(pos, bss->tim + n1, n2 - n1 + 1); - - tim[1] = n2 - n1 + 4; - skb_put(skb, n2 - n1); - } else { - *pos++ = aid0; /* Bitmap control */ - *pos++ = 0; /* Part Virt Bitmap */ - } - spin_unlock_bh(&local->sta_lock); -} - - -struct sk_buff * ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; - struct net_device *bdev; - struct ieee80211_sub_if_data *sdata = NULL; - struct ieee80211_if_ap *ap = NULL; - struct ieee80211_rate *rate; - struct rate_control_extra extra; - u8 *b_head, *b_tail; - int bh_len, bt_len; - - bdev = dev_get_by_index(if_id); - if (bdev) { - sdata = IEEE80211_DEV_TO_SUB_IF(bdev); - ap = &sdata->u.ap; - dev_put(bdev); - } - - if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || - !ap->beacon_head) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "no beacon data avail for idx=%d " - "(%s)\n", if_id, bdev ? bdev->name : "N/A"); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - return NULL; - } - - /* Assume we are generating the normal beacon locally */ - b_head = ap->beacon_head; - b_tail = ap->beacon_tail; - bh_len = ap->beacon_head_len; - bt_len = ap->beacon_tail_len; - - skb = dev_alloc_skb(local->tx_headroom + - bh_len + bt_len + 256 /* maximum TIM len */); - if (!skb) - return NULL; - - skb_reserve(skb, local->tx_headroom); - memcpy(skb_put(skb, bh_len), b_head, bh_len); - - ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); - - ieee80211_beacon_add_tim(local, ap, skb); - - if (b_tail) { - memcpy(skb_put(skb, bt_len), b_tail, bt_len); - } - - if (control) { - memset(&extra, 0, sizeof(extra)); - extra.mode = local->oper_hw_mode; - - rate = rate_control_get_rate(local, local->mdev, skb, &extra); - if (!rate) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " - "found\n", local->mdev->name); - } - dev_kfree_skb(skb); - return NULL; - } - - control->tx_rate = (local->short_preamble && - (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? - rate->val2 : rate->val; - control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; - control->power_level = local->hw.conf.power_level; - control->flags |= IEEE80211_TXCTL_NO_ACK; - control->retry_limit = 1; - control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; - } - - ap->num_beacons++; - return skb; -} -EXPORT_SYMBOL(ieee80211_beacon_get); - -__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, - size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_rate *rate; - int short_preamble = local->short_preamble; - int erp; - u16 dur; - - rate = frame_txctl->rts_rate; - erp = !!(rate->flags & IEEE80211_RATE_ERP); - - /* CTS duration */ - dur = ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - /* Data frame duration */ - dur += ieee80211_frame_duration(local, frame_len, rate->rate, - erp, short_preamble); - /* ACK duration */ - dur += ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_rts_duration); - - -__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, - size_t frame_len, - const struct ieee80211_tx_control *frame_txctl) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_rate *rate; - int short_preamble = local->short_preamble; - int erp; - u16 dur; - - rate = frame_txctl->rts_rate; - erp = !!(rate->flags & IEEE80211_RATE_ERP); - - /* Data frame duration */ - dur = ieee80211_frame_duration(local, frame_len, rate->rate, - erp, short_preamble); - if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { - /* ACK duration */ - dur += ieee80211_frame_duration(local, 10, rate->rate, - erp, short_preamble); - } - - return cpu_to_le16(dur); -} -EXPORT_SYMBOL(ieee80211_ctstoself_duration); - -void ieee80211_rts_get(struct ieee80211_hw *hw, - const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, - struct ieee80211_rts *rts) -{ - const struct ieee80211_hdr *hdr = frame; - u16 fctl; - - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; - rts->frame_control = cpu_to_le16(fctl); - rts->duration = ieee80211_rts_duration(hw, frame_len, frame_txctl); - memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); - memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); -} -EXPORT_SYMBOL(ieee80211_rts_get); - -void ieee80211_ctstoself_get(struct ieee80211_hw *hw, - const void *frame, size_t frame_len, - const struct ieee80211_tx_control *frame_txctl, - struct ieee80211_cts *cts) -{ - const struct ieee80211_hdr *hdr = frame; - u16 fctl; - - fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; - cts->frame_control = cpu_to_le16(fctl); - cts->duration = ieee80211_ctstoself_duration(hw, frame_len, frame_txctl); - memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); -} -EXPORT_SYMBOL(ieee80211_ctstoself_get); - -struct sk_buff * -ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, - struct ieee80211_tx_control *control) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sk_buff *skb; - struct sta_info *sta; - ieee80211_tx_handler *handler; - struct ieee80211_txrx_data tx; - ieee80211_txrx_result res = TXRX_DROP; - struct net_device *bdev; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_if_ap *bss = NULL; - - bdev = dev_get_by_index(if_id); - if (bdev) { - sdata = IEEE80211_DEV_TO_SUB_IF(bdev); - bss = &sdata->u.ap; - dev_put(bdev); - } - if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head) - return NULL; - - if (bss->dtim_count != 0) - return NULL; /* send buffered bc/mc only after DTIM beacon */ - memset(control, 0, sizeof(*control)); - while (1) { - skb = skb_dequeue(&bss->ps_bc_buf); - if (!skb) - return NULL; - local->total_ps_buffered--; - - if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - /* more buffered multicast/broadcast frames ==> set - * MoreData flag in IEEE 802.11 header to inform PS - * STAs */ - hdr->frame_control |= - cpu_to_le16(IEEE80211_FCTL_MOREDATA); - } - - if (ieee80211_tx_prepare(&tx, skb, local->mdev, control) == 0) - break; - dev_kfree_skb_any(skb); - } - sta = tx.sta; - tx.u.tx.ps_buffered = 1; - - for (handler = local->tx_handlers; *handler != NULL; handler++) { - res = (*handler)(&tx); - if (res == TXRX_DROP || res == TXRX_QUEUED) - break; - } - dev_put(tx.dev); - skb = tx.skb; /* handlers are allowed to change skb */ - - if (res == TXRX_DROP) { - I802_DEBUG_INC(local->tx_handlers_drop); - dev_kfree_skb(skb); - skb = NULL; - } else if (res == TXRX_QUEUED) { - I802_DEBUG_INC(local->tx_handlers_queued); - skb = NULL; - } - - if (sta) - sta_info_put(sta); - - return skb; -} -EXPORT_SYMBOL(ieee80211_get_buffered_bc); - -static int __ieee80211_if_config(struct net_device *dev, - struct sk_buff *beacon, - struct ieee80211_tx_control *control) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_conf conf; - static u8 scan_bssid[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - - if (!local->ops->config_interface || !netif_running(dev)) - return 0; - - memset(&conf, 0, sizeof(conf)); - conf.type = sdata->type; - if (sdata->type == IEEE80211_IF_TYPE_STA || - sdata->type == IEEE80211_IF_TYPE_IBSS) { - if (local->sta_scanning && - local->scan_dev == dev) - conf.bssid = scan_bssid; - else - conf.bssid = sdata->u.sta.bssid; - conf.ssid = sdata->u.sta.ssid; - conf.ssid_len = sdata->u.sta.ssid_len; - conf.generic_elem = sdata->u.sta.extra_ie; - conf.generic_elem_len = sdata->u.sta.extra_ie_len; - } else if (sdata->type == IEEE80211_IF_TYPE_AP) { - conf.ssid = sdata->u.ap.ssid; - conf.ssid_len = sdata->u.ap.ssid_len; - conf.generic_elem = sdata->u.ap.generic_elem; - conf.generic_elem_len = sdata->u.ap.generic_elem_len; - conf.beacon = beacon; - conf.beacon_control = control; - } - return local->ops->config_interface(local_to_hw(local), - dev->ifindex, &conf); -} - -int ieee80211_if_config(struct net_device *dev) -{ - return __ieee80211_if_config(dev, NULL, NULL); -} - -int ieee80211_if_config_beacon(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_control control; - struct sk_buff *skb; - - if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) - return 0; - skb = ieee80211_beacon_get(local_to_hw(local), dev->ifindex, &control); - if (!skb) - return -ENOMEM; - return __ieee80211_if_config(dev, skb, &control); -} - -int ieee80211_hw_config(struct ieee80211_local *local) -{ - struct ieee80211_hw_mode *mode; - struct ieee80211_channel *chan; - int ret = 0; - - if (local->sta_scanning) { - chan = local->scan_channel; - mode = local->scan_hw_mode; - } else { - chan = local->oper_channel; - mode = local->oper_hw_mode; - } - - local->hw.conf.channel = chan->chan; - local->hw.conf.channel_val = chan->val; - local->hw.conf.power_level = chan->power_level; - local->hw.conf.freq = chan->freq; - local->hw.conf.phymode = mode->mode; - local->hw.conf.antenna_max = chan->antenna_max; - local->hw.conf.chan = chan; - local->hw.conf.mode = mode; - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "HW CONFIG: channel=%d freq=%d " - "phymode=%d\n", local->hw.conf.channel, local->hw.conf.freq, - local->hw.conf.phymode); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - - if (local->ops->config) - ret = local->ops->config(local_to_hw(local), &local->hw.conf); - - return ret; -} - - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - - -static int ieee80211_change_mtu_apdev(struct net_device *dev, int new_mtu) -{ - /* FIX: what would be proper limits for MTU? - * This interface uses 802.11 frames. */ - if (new_mtu < 256 || new_mtu > IEEE80211_MAX_DATA_LEN) { - printk(KERN_WARNING "%s: invalid MTU %d\n", - dev->name, new_mtu); - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - -enum netif_tx_lock_class { - TX_LOCK_NORMAL, - TX_LOCK_MASTER, -}; - -static inline void netif_tx_lock_nested(struct net_device *dev, int subclass) -{ - spin_lock_nested(&dev->_xmit_lock, subclass); - dev->xmit_lock_owner = smp_processor_id(); -} - -static void ieee80211_set_multicast_list(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - unsigned short flags; - - netif_tx_lock_nested(local->mdev, TX_LOCK_MASTER); - if (((dev->flags & IFF_ALLMULTI) != 0) ^ (sdata->allmulti != 0)) { - if (sdata->allmulti) { - sdata->allmulti = 0; - local->iff_allmultis--; - } else { - sdata->allmulti = 1; - local->iff_allmultis++; - } - } - if (((dev->flags & IFF_PROMISC) != 0) ^ (sdata->promisc != 0)) { - if (sdata->promisc) { - sdata->promisc = 0; - local->iff_promiscs--; - } else { - sdata->promisc = 1; - local->iff_promiscs++; - } - } - if (dev->mc_count != sdata->mc_count) { - local->mc_count = local->mc_count - sdata->mc_count + - dev->mc_count; - sdata->mc_count = dev->mc_count; - } - if (local->ops->set_multicast_list) { - flags = local->mdev->flags; - if (local->iff_allmultis) - flags |= IFF_ALLMULTI; - if (local->iff_promiscs) - flags |= IFF_PROMISC; - read_lock(&local->sub_if_lock); - local->ops->set_multicast_list(local_to_hw(local), flags, - local->mc_count); - read_unlock(&local->sub_if_lock); - } - netif_tx_unlock(local->mdev); -} - -struct dev_mc_list *ieee80211_get_mc_list_item(struct ieee80211_hw *hw, - struct dev_mc_list *prev, - void **ptr) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = *ptr; - struct dev_mc_list *mc; - - if (!prev) { - WARN_ON(sdata); - sdata = NULL; - } - if (!prev || !prev->next) { - if (sdata) - sdata = list_entry(sdata->list.next, - struct ieee80211_sub_if_data, list); - else - sdata = list_entry(local->sub_if_list.next, - struct ieee80211_sub_if_data, list); - if (&sdata->list != &local->sub_if_list) - mc = sdata->dev->mc_list; - else - mc = NULL; - } else - mc = prev->next; - - *ptr = sdata; - return mc; -} -EXPORT_SYMBOL(ieee80211_get_mc_list_item); - -static struct net_device_stats *ieee80211_get_stats(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - return &(sdata->stats); -} - -static void ieee80211_if_shutdown(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - ASSERT_RTNL(); - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.state = IEEE80211_DISABLED; - del_timer_sync(&sdata->u.sta.timer); - skb_queue_purge(&sdata->u.sta.skb_queue); - if (!local->ops->hw_scan && - local->scan_dev == sdata->dev) { - local->sta_scanning = 0; - cancel_delayed_work(&local->scan_work); - } - flush_workqueue(local->hw.workqueue); - break; - } -} - -static inline int identical_mac_addr_allowed(int type1, int type2) -{ - return (type1 == IEEE80211_IF_TYPE_MNTR || - type2 == IEEE80211_IF_TYPE_MNTR || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_WDS) || - (type1 == IEEE80211_IF_TYPE_WDS && - (type2 == IEEE80211_IF_TYPE_WDS || - type2 == IEEE80211_IF_TYPE_AP)) || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_VLAN) || - (type1 == IEEE80211_IF_TYPE_VLAN && - (type2 == IEEE80211_IF_TYPE_AP || - type2 == IEEE80211_IF_TYPE_VLAN))); -} - -static int ieee80211_master_open(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - int res = -EOPNOTSUPP; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - if (sdata->dev != dev && netif_running(sdata->dev)) { - res = 0; - break; - } - } - read_unlock(&local->sub_if_lock); - return res; -} - -static int ieee80211_master_stop(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) - if (sdata->dev != dev && netif_running(sdata->dev)) - dev_close(sdata->dev); - read_unlock(&local->sub_if_lock); - - return 0; -} - -static int ieee80211_mgmt_open(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (!netif_running(local->mdev)) - return -EOPNOTSUPP; - return 0; -} - -static int ieee80211_mgmt_stop(struct net_device *dev) -{ - return 0; -} - -/* Check if running monitor interfaces should go to a "soft monitor" mode - * and switch them if necessary. */ -static inline void ieee80211_start_soft_monitor(struct ieee80211_local *local) -{ - struct ieee80211_if_init_conf conf; - - if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && - local->ops->remove_interface) { - conf.if_id = -1; - conf.type = IEEE80211_IF_TYPE_MNTR; - conf.mac_addr = NULL; - local->ops->remove_interface(local_to_hw(local), &conf); - } -} - -/* Check if running monitor interfaces should go to a "hard monitor" mode - * and switch them if necessary. */ -static void ieee80211_start_hard_monitor(struct ieee80211_local *local) -{ - struct ieee80211_if_init_conf conf; - - if (local->open_count && local->open_count == local->monitors && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - conf.if_id = -1; - conf.type = IEEE80211_IF_TYPE_MNTR; - conf.mac_addr = NULL; - local->ops->add_interface(local_to_hw(local), &conf); - } -} - -static int ieee80211_open(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata, *nsdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_init_conf conf; - int res; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - read_lock(&local->sub_if_lock); - list_for_each_entry(nsdata, &local->sub_if_list, list) { - struct net_device *ndev = nsdata->dev; - - if (ndev != dev && ndev != local->mdev && netif_running(ndev) && - compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0 && - !identical_mac_addr_allowed(sdata->type, nsdata->type)) { - read_unlock(&local->sub_if_lock); - return -ENOTUNIQ; - } - } - read_unlock(&local->sub_if_lock); - - if (sdata->type == IEEE80211_IF_TYPE_WDS && - is_zero_ether_addr(sdata->u.wds.remote_addr)) - return -ENOLINK; - - if (sdata->type == IEEE80211_IF_TYPE_MNTR && local->open_count && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - /* run the interface in a "soft monitor" mode */ - local->monitors++; - local->open_count++; - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - return 0; - } - ieee80211_start_soft_monitor(local); - - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) { - if (sdata->type == IEEE80211_IF_TYPE_MNTR) - ieee80211_start_hard_monitor(local); - return res; - } - - if (local->open_count == 0) { - res = 0; - tasklet_enable(&local->tx_pending_tasklet); - tasklet_enable(&local->tasklet); - if (local->ops->open) - res = local->ops->open(local_to_hw(local)); - if (res == 0) { - res = dev_open(local->mdev); - if (res) { - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - } else { - res = ieee80211_hw_config(local); - if (res && local->ops->stop) - local->ops->stop(local_to_hw(local)); - else if (!res && local->apdev) - dev_open(local->apdev); - } - } - if (res) { - if (local->ops->remove_interface) - local->ops->remove_interface(local_to_hw(local), - &conf); - return res; - } - } - local->open_count++; - - if (sdata->type == IEEE80211_IF_TYPE_MNTR) { - local->monitors++; - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - } else - ieee80211_if_config(dev); - - if (sdata->type == IEEE80211_IF_TYPE_STA && - !local->user_space_mlme) - netif_carrier_off(dev); - else - netif_carrier_on(dev); - - netif_start_queue(dev); - return 0; -} - - -static int ieee80211_stop(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (sdata->type == IEEE80211_IF_TYPE_MNTR && - local->open_count > 1 && - !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { - /* remove "soft monitor" interface */ - local->open_count--; - local->monitors--; - if (!local->monitors) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - return 0; - } - - netif_stop_queue(dev); - ieee80211_if_shutdown(dev); - - if (sdata->type == IEEE80211_IF_TYPE_MNTR) { - local->monitors--; - if (!local->monitors) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - } - - local->open_count--; - if (local->open_count == 0) { - if (netif_running(local->mdev)) - dev_close(local->mdev); - if (local->apdev) - dev_close(local->apdev); - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - tasklet_disable(&local->tx_pending_tasklet); - tasklet_disable(&local->tasklet); - } - if (local->ops->remove_interface) { - struct ieee80211_if_init_conf conf; - - conf.if_id = dev->ifindex; - conf.type = sdata->type; - conf.mac_addr = dev->dev_addr; - local->ops->remove_interface(local_to_hw(local), &conf); - } - - ieee80211_start_hard_monitor(local); - - return 0; -} - - -static int header_parse_80211(struct sk_buff *skb, unsigned char *haddr) -{ - memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ - return ETH_ALEN; -} - -static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) -{ - return compare_ether_addr(raddr, addr) == 0 || - is_broadcast_ether_addr(raddr); -} - - -static ieee80211_txrx_result -ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) -{ - struct net_device *dev = rx->dev; - struct ieee80211_local *local = rx->local; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc, hdrlen, ethertype; - u8 *payload; - u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; - struct sk_buff *skb = rx->skb, *skb2; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) - return TXRX_CONTINUE; - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) - return TXRX_DROP; - - hdrlen = ieee80211_get_hdrlen(fc); - - /* convert IEEE 802.11 header + possible LLC headers into Ethernet - * header - * IEEE 802.11 address fields: - * ToDS FromDS Addr1 Addr2 Addr3 Addr4 - * 0 0 DA SA BSSID n/a - * 0 1 DA BSSID SA n/a - * 1 0 BSSID SA DA n/a - * 1 1 RA TA DA SA - */ - - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - /* BSSID SA DA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && - sdata->type != IEEE80211_IF_TYPE_VLAN)) { - printk(KERN_DEBUG "%s: dropped ToDS frame (BSSID=" - MAC_FMT " SA=" MAC_FMT " DA=" MAC_FMT ")\n", - dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3)); - return TXRX_DROP; - } - break; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - /* RA TA DA SA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - - if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { - printk(KERN_DEBUG "%s: dropped FromDS&ToDS frame (RA=" - MAC_FMT " TA=" MAC_FMT " DA=" MAC_FMT " SA=" - MAC_FMT ")\n", - rx->dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr3), - MAC_ARG(hdr->addr4)); - return TXRX_DROP; - } - break; - case IEEE80211_FCTL_FROMDS: - /* DA BSSID SA */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - - if (sdata->type != IEEE80211_IF_TYPE_STA) { - return TXRX_DROP; - } - break; - case 0: - /* DA SA BSSID */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (sdata->type != IEEE80211_IF_TYPE_IBSS) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: dropped IBSS frame (DA=" - MAC_FMT " SA=" MAC_FMT " BSSID=" MAC_FMT - ")\n", - dev->name, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), - MAC_ARG(hdr->addr3)); - } - return TXRX_DROP; - } - break; - } - - payload = skb->data + hdrlen; - - if (unlikely(skb->len - hdrlen < 8)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: RX too short data frame " - "payload\n", dev->name); - } - return TXRX_DROP; - } - - ethertype = (payload[6] << 8) | payload[7]; - - if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && - ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || - compare_ether_addr(payload, bridge_tunnel_header) == 0)) { - /* remove RFC1042 or Bridge-Tunnel encapsulation and - * replace EtherType */ - skb_pull(skb, hdrlen + 6); - memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); - memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); - } else { - struct ethhdr *ehdr; - __be16 len; - skb_pull(skb, hdrlen); - len = htons(skb->len); - ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); - memcpy(ehdr->h_dest, dst, ETH_ALEN); - memcpy(ehdr->h_source, src, ETH_ALEN); - ehdr->h_proto = len; - } - skb->dev = dev; - - skb2 = NULL; - - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP - || sdata->type == IEEE80211_IF_TYPE_VLAN) && rx->u.rx.ra_match) { - if (is_multicast_ether_addr(skb->data)) { - /* send multicast frames both to higher layers in - * local net stack and back to the wireless media */ - skb2 = skb_copy(skb, GFP_ATOMIC); - if (!skb2) - printk(KERN_DEBUG "%s: failed to clone " - "multicast frame\n", dev->name); - } else { - struct sta_info *dsta; - dsta = sta_info_get(local, skb->data); - if (dsta && !dsta->dev) { - printk(KERN_DEBUG "Station with null dev " - "structure!\n"); - } else if (dsta && dsta->dev == dev) { - /* Destination station is associated to this - * AP, so send the frame directly to it and - * do not pass the frame to local net stack. - */ - skb2 = skb; - skb = NULL; - } - if (dsta) - sta_info_put(dsta); - } - } - - if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); - } - - if (skb2) { - /* send to wireless media */ - skb2->protocol = __constant_htons(ETH_P_802_3); - skb_set_network_header(skb2, 0); - skb_set_mac_header(skb2, 0); - dev_queue_xmit(skb2); - } - - return TXRX_QUEUED; -} - - -static struct ieee80211_rate * -ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate) -{ - struct ieee80211_hw_mode *mode; - int r; - - list_for_each_entry(mode, &local->modes_list, list) { - if (mode->mode != phymode) - continue; - for (r = 0; r < mode->num_rates; r++) { - struct ieee80211_rate *rate = &mode->rates[r]; - if (rate->val == hw_rate || - (rate->flags & IEEE80211_RATE_PREAMBLE2 && - rate->val2 == hw_rate)) - return rate; - } + if (local->open_count && local->open_count == local->monitors && + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER) && + local->ops->remove_interface) { + conf.if_id = -1; + conf.type = IEEE80211_IF_TYPE_MNTR; + conf.mac_addr = NULL; + local->ops->remove_interface(local_to_hw(local), &conf); } - - return NULL; } -static void -ieee80211_fill_frame_info(struct ieee80211_local *local, - struct ieee80211_frame_info *fi, - struct ieee80211_rx_status *status) +/* Check if running monitor interfaces should go to a "hard monitor" mode + * and switch them if necessary. */ +static void ieee80211_start_hard_monitor(struct ieee80211_local *local) { - if (status) { - struct timespec ts; - struct ieee80211_rate *rate; - - jiffies_to_timespec(jiffies, &ts); - fi->hosttime = cpu_to_be64((u64) ts.tv_sec * 1000000 + - ts.tv_nsec / 1000); - fi->mactime = cpu_to_be64(status->mactime); - switch (status->phymode) { - case MODE_IEEE80211A: - fi->phytype = htonl(ieee80211_phytype_ofdm_dot11_a); - break; - case MODE_IEEE80211B: - fi->phytype = htonl(ieee80211_phytype_dsss_dot11_b); - break; - case MODE_IEEE80211G: - fi->phytype = htonl(ieee80211_phytype_pbcc_dot11_g); - break; - case MODE_ATHEROS_TURBO: - fi->phytype = - htonl(ieee80211_phytype_dsss_dot11_turbo); - break; - default: - fi->phytype = htonl(0xAAAAAAAA); - break; - } - fi->channel = htonl(status->channel); - rate = ieee80211_get_rate(local, status->phymode, - status->rate); - if (rate) { - fi->datarate = htonl(rate->rate); - if (rate->flags & IEEE80211_RATE_PREAMBLE2) { - if (status->rate == rate->val) - fi->preamble = htonl(2); /* long */ - else if (status->rate == rate->val2) - fi->preamble = htonl(1); /* short */ - } else - fi->preamble = htonl(0); - } else { - fi->datarate = htonl(0); - fi->preamble = htonl(0); - } - - fi->antenna = htonl(status->antenna); - fi->priority = htonl(0xffffffff); /* no clue */ - fi->ssi_type = htonl(ieee80211_ssi_raw); - fi->ssi_signal = htonl(status->ssi); - fi->ssi_noise = 0x00000000; - fi->encoding = 0; - } else { - /* clear everything because we really don't know. - * the msg_type field isn't present on monitor frames - * so we don't know whether it will be present or not, - * but it's ok to not clear it since it'll be assigned - * anyway */ - memset(fi, 0, sizeof(*fi) - sizeof(fi->msg_type)); + struct ieee80211_if_init_conf conf; - fi->ssi_type = htonl(ieee80211_ssi_none); + if (local->open_count && local->open_count == local->monitors && + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { + conf.if_id = -1; + conf.type = IEEE80211_IF_TYPE_MNTR; + conf.mac_addr = NULL; + local->ops->add_interface(local_to_hw(local), &conf); } - fi->version = htonl(IEEE80211_FI_VERSION); - fi->length = cpu_to_be32(sizeof(*fi) - sizeof(fi->msg_type)); } -/* this routine is actually not just for this, but also - * for pushing fake 'management' frames into userspace. - * it shall be replaced by a netlink-based system. */ -void -ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, - struct ieee80211_rx_status *status, u32 msg_type) +static void ieee80211_if_open(struct net_device *dev) { - struct ieee80211_frame_info *fi; - const size_t hlen = sizeof(struct ieee80211_frame_info); - struct ieee80211_sub_if_data *sdata; - - skb->dev = local->apdev; - - sdata = IEEE80211_DEV_TO_SUB_IF(local->apdev); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (skb_headroom(skb) < hlen) { - I802_DEBUG_INC(local->rx_expand_skb_head); - if (pskb_expand_head(skb, hlen, 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return; - } + switch (sdata->type) { + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + break; } - - fi = (struct ieee80211_frame_info *) skb_push(skb, hlen); - - ieee80211_fill_frame_info(local, fi, status); - fi->msg_type = htonl(msg_type); - - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); } -static void -ieee80211_rx_monitor(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *status) +static int ieee80211_open(struct net_device *dev) { + struct ieee80211_sub_if_data *sdata, *nsdata; struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - struct ieee80211_rate *rate; - struct ieee80211_rtap_hdr { - struct ieee80211_radiotap_header hdr; - u8 flags; - u8 rate; - __le16 chan_freq; - __le16 chan_flags; - u8 antsignal; - } __attribute__ ((packed)) *rthdr; - - skb->dev = dev; + struct ieee80211_if_init_conf conf; + int res; sdata = IEEE80211_DEV_TO_SUB_IF(dev); + read_lock(&local->sub_if_lock); + list_for_each_entry(nsdata, &local->sub_if_list, list) { + struct net_device *ndev = nsdata->dev; - if (status->flag & RX_FLAG_RADIOTAP) - goto out; - - if (skb_headroom(skb) < sizeof(*rthdr)) { - I802_DEBUG_INC(local->rx_expand_skb_head); - if (pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { - dev_kfree_skb(skb); - return; - } - } - - rthdr = (struct ieee80211_rtap_hdr *) skb_push(skb, sizeof(*rthdr)); - memset(rthdr, 0, sizeof(*rthdr)); - rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); - rthdr->hdr.it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | - (1 << IEEE80211_RADIOTAP_RATE) | - (1 << IEEE80211_RADIOTAP_CHANNEL) | - (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL)); - rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? - IEEE80211_RADIOTAP_F_FCS : 0; - rate = ieee80211_get_rate(local, status->phymode, status->rate); - if (rate) - rthdr->rate = rate->rate / 5; - rthdr->chan_freq = cpu_to_le16(status->freq); - rthdr->chan_flags = - status->phymode == MODE_IEEE80211A ? - cpu_to_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ) : - cpu_to_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ); - rthdr->antsignal = status->ssi; - - out: - sdata->stats.rx_packets++; - sdata->stats.rx_bytes += skb->len; - - skb_set_mac_header(skb, 0); - skb->ip_summed = CHECKSUM_UNNECESSARY; - skb->pkt_type = PACKET_OTHERHOST; - skb->protocol = htons(ETH_P_802_2); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); -} - -int ieee80211_radar_status(struct ieee80211_hw *hw, int channel, - int radar, int radar_type) -{ - struct sk_buff *skb; - struct ieee80211_radar_info *msg; - struct ieee80211_local *local = hw_to_local(hw); - - if (!local->apdev) - return 0; - - skb = dev_alloc_skb(sizeof(struct ieee80211_frame_info) + - sizeof(struct ieee80211_radar_info)); - - if (!skb) - return -ENOMEM; - skb_reserve(skb, sizeof(struct ieee80211_frame_info)); - - msg = (struct ieee80211_radar_info *) - skb_put(skb, sizeof(struct ieee80211_radar_info)); - msg->channel = channel; - msg->radar = radar; - msg->radar_type = radar_type; - - ieee80211_rx_mgmt(local, skb, NULL, ieee80211_msg_radar); - return 0; -} -EXPORT_SYMBOL(ieee80211_radar_status); - - -static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) -{ - struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); - - if (sdata->bss) - atomic_inc(&sdata->bss->num_sta_ps); - sta->flags |= WLAN_STA_PS; - sta->pspoll = 0; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d enters power " - "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ -} - - -static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - int sent = 0; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_tx_packet_data *pkt_data; - - sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); - if (sdata->bss) - atomic_dec(&sdata->bss->num_sta_ps); - sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM); - sta->pspoll = 0; - if (!skb_queue_empty(&sta->ps_tx_buf)) { - if (local->ops->set_tim) - local->ops->set_tim(local_to_hw(local), sta->aid, 0); - if (sdata->bss) - bss_tim_clear(local, sdata->bss, sta->aid); - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d exits power " - "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - /* Send all buffered frames to the station */ - while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - sent++; - pkt_data->requeue = 1; - dev_queue_xmit(skb); - } - while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; - local->total_ps_buffered--; - sent++; -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d send PS frame " - "since STA not sleeping anymore\n", dev->name, - MAC_ARG(sta->addr), sta->aid); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - pkt_data->requeue = 1; - dev_queue_xmit(skb); - } - - return sent; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) -{ - struct sk_buff *skb; - int no_pending_pkts; - - if (likely(!rx->sta || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || - !rx->u.rx.ra_match)) - return TXRX_CONTINUE; - - skb = skb_dequeue(&rx->sta->tx_filtered); - if (!skb) { - skb = skb_dequeue(&rx->sta->ps_tx_buf); - if (skb) - rx->local->total_ps_buffered--; - } - no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && - skb_queue_empty(&rx->sta->ps_tx_buf); - - if (skb) { - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) skb->data; - - /* tell TX path to send one frame even though the STA may - * still remain is PS mode after this frame exchange */ - rx->sta->pspoll = 1; - -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS Poll (entries " - "after %d)\n", - MAC_ARG(rx->sta->addr), rx->sta->aid, - skb_queue_len(&rx->sta->ps_tx_buf)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - /* Use MoreData flag to indicate whether there are more - * buffered frames for this STA */ - if (no_pending_pkts) { - hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); - rx->sta->flags &= ~WLAN_STA_TIM; - } else - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); - - dev_queue_xmit(skb); - - if (no_pending_pkts) { - if (rx->local->ops->set_tim) - rx->local->ops->set_tim(local_to_hw(rx->local), - rx->sta->aid, 0); - if (rx->sdata->bss) - bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid); - } -#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG - } else if (!rx->u.rx.sent_ps_buffered) { - printk(KERN_DEBUG "%s: STA " MAC_FMT " sent PS Poll even " - "though there is no buffered frames for it\n", - rx->dev->name, MAC_ARG(rx->sta->addr)); -#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - - } - - /* Free PS Poll skb here instead of returning TXRX_DROP that would - * count as an dropped frame. */ - dev_kfree_skb(rx->skb); - - return TXRX_QUEUED; -} - - -static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, - unsigned int frag, unsigned int seq, int rx_queue, - struct sk_buff **skb) -{ - struct ieee80211_fragment_entry *entry; - int idx; - - idx = sdata->fragment_next; - entry = &sdata->fragments[sdata->fragment_next++]; - if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) - sdata->fragment_next = 0; - - if (!skb_queue_empty(&entry->skb_list)) { -#ifdef CONFIG_MAC80211_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) entry->skb_list.next->data; - printk(KERN_DEBUG "%s: RX reassembly removed oldest " - "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " - "addr1=" MAC_FMT " addr2=" MAC_FMT "\n", - sdata->dev->name, idx, - jiffies - entry->first_frag_time, entry->seq, - entry->last_frag, MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ - __skb_queue_purge(&entry->skb_list); - } - - __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ - *skb = NULL; - entry->first_frag_time = jiffies; - entry->seq = seq; - entry->rx_queue = rx_queue; - entry->last_frag = frag; - entry->ccmp = 0; - entry->extra_len = 0; - - return entry; -} - - -static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, - u16 fc, unsigned int frag, unsigned int seq, - int rx_queue, struct ieee80211_hdr *hdr) -{ - struct ieee80211_fragment_entry *entry; - int i, idx; - - idx = sdata->fragment_next; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { - struct ieee80211_hdr *f_hdr; - u16 f_fc; - - idx--; - if (idx < 0) - idx = IEEE80211_FRAGMENT_MAX - 1; - - entry = &sdata->fragments[idx]; - if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || - entry->rx_queue != rx_queue || - entry->last_frag + 1 != frag) - continue; - - f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; - f_fc = le16_to_cpu(f_hdr->frame_control); - - if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || - compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || - compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) - continue; - - if (entry->first_frag_time + 2 * HZ < jiffies) { - __skb_queue_purge(&entry->skb_list); - continue; - } - return entry; - } - - return NULL; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_hdr *hdr; - u16 sc; - unsigned int frag, seq; - struct ieee80211_fragment_entry *entry; - struct sk_buff *skb; - - hdr = (struct ieee80211_hdr *) rx->skb->data; - sc = le16_to_cpu(hdr->seq_ctrl); - frag = sc & IEEE80211_SCTL_FRAG; - - if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || - (rx->skb)->len < 24 || - is_multicast_ether_addr(hdr->addr1))) { - /* not fragmented */ - goto out; - } - I802_DEBUG_INC(rx->local->rx_handlers_fragments); - - seq = (sc & IEEE80211_SCTL_SEQ) >> 4; - - if (frag == 0) { - /* This is the first fragment of a new frame. */ - entry = ieee80211_reassemble_add(rx->sdata, frag, seq, - rx->u.rx.queue, &(rx->skb)); - if (rx->key && rx->key->alg == ALG_CCMP && - (rx->fc & IEEE80211_FCTL_PROTECTED)) { - /* Store CCMP PN so that we can verify that the next - * fragment has a sequential PN value. */ - entry->ccmp = 1; - memcpy(entry->last_pn, - rx->key->u.ccmp.rx_pn[rx->u.rx.queue], - CCMP_PN_LEN); - } - return TXRX_QUEUED; - } - - /* This is a fragment for a frame that should already be pending in - * fragment cache. Add this fragment to the end of the pending entry. - */ - entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, - rx->u.rx.queue, hdr); - if (!entry) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - return TXRX_DROP; - } - - /* Verify that MPDUs within one MSDU have sequential PN values. - * (IEEE 802.11i, 8.3.3.4.5) */ - if (entry->ccmp) { - int i; - u8 pn[CCMP_PN_LEN], *rpn; - if (!rx->key || rx->key->alg != ALG_CCMP) - return TXRX_DROP; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { - pn[i]++; - if (pn[i]) - break; - } - rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { - printk(KERN_DEBUG "%s: defrag: CCMP PN not sequential" - " A2=" MAC_FMT " PN=%02x%02x%02x%02x%02x%02x " - "(expected %02x%02x%02x%02x%02x%02x)\n", - rx->dev->name, MAC_ARG(hdr->addr2), - rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], rpn[5], - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); - return TXRX_DROP; - } - memcpy(entry->last_pn, pn, CCMP_PN_LEN); - } - - skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); - __skb_queue_tail(&entry->skb_list, rx->skb); - entry->last_frag = frag; - entry->extra_len += rx->skb->len; - if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { - rx->skb = NULL; - return TXRX_QUEUED; - } - - rx->skb = __skb_dequeue(&entry->skb_list); - if (skb_tailroom(rx->skb) < entry->extra_len) { - I802_DEBUG_INC(rx->local->rx_expand_skb_head2); - if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, - GFP_ATOMIC))) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); - __skb_queue_purge(&entry->skb_list); - return TXRX_DROP; - } - } - while ((skb = __skb_dequeue(&entry->skb_list))) { - memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); - dev_kfree_skb(skb); - } - - /* Complete frame has been reassembled - process it now */ - rx->fragmented = 1; - - out: - if (rx->sta) - rx->sta->rx_packets++; - if (is_multicast_ether_addr(hdr->addr1)) - rx->local->dot11MulticastReceivedFrameCount++; - else - ieee80211_led_rx(rx->local); - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_monitor(struct ieee80211_txrx_data *rx) -{ - if (rx->sdata->type == IEEE80211_IF_TYPE_MNTR) { - ieee80211_rx_monitor(rx->dev, rx->skb, rx->u.rx.status); - return TXRX_QUEUED; - } - - if (rx->u.rx.status->flag & RX_FLAG_RADIOTAP) - skb_pull(rx->skb, ieee80211_get_radiotap_len(rx->skb)); - - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_check(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_hdr *hdr; - int always_sta_key; - hdr = (struct ieee80211_hdr *) rx->skb->data; - - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { - if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && - rx->sta->last_seq_ctrl[rx->u.rx.queue] == - hdr->seq_ctrl)) { - if (rx->u.rx.ra_match) { - rx->local->dot11FrameDuplicateCount++; - rx->sta->num_duplicates++; - } - return TXRX_DROP; - } else - rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl; - } - - if ((rx->local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) && - rx->skb->len > FCS_LEN) - skb_trim(rx->skb, rx->skb->len - FCS_LEN); - - if (unlikely(rx->skb->len < 16)) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_short); - return TXRX_DROP; - } - - if (!rx->u.rx.ra_match) - rx->skb->pkt_type = PACKET_OTHERHOST; - else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0) - rx->skb->pkt_type = PACKET_HOST; - else if (is_multicast_ether_addr(hdr->addr1)) { - if (is_broadcast_ether_addr(hdr->addr1)) - rx->skb->pkt_type = PACKET_BROADCAST; - else - rx->skb->pkt_type = PACKET_MULTICAST; - } else - rx->skb->pkt_type = PACKET_OTHERHOST; - - /* Drop disallowed frame classes based on STA auth/assoc state; - * IEEE 802.11, Chap 5.5. - * - * 80211.o does filtering only based on association state, i.e., it - * drops Class 3 frames from not associated stations. hostapd sends - * deauth/disassoc frames when needed. In addition, hostapd is - * responsible for filtering on both auth and assoc states. - */ - if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || - ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && - rx->sdata->type != IEEE80211_IF_TYPE_IBSS && - (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { - if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && - !(rx->fc & IEEE80211_FCTL_TODS) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) - || !rx->u.rx.ra_match) { - /* Drop IBSS frames and frames for other hosts - * silently. */ - return TXRX_DROP; + if (ndev != dev && ndev != local->mdev && netif_running(ndev) && + compare_ether_addr(dev->dev_addr, ndev->dev_addr) == 0 && + !identical_mac_addr_allowed(sdata->type, nsdata->type)) { + read_unlock(&local->sub_if_lock); + return -ENOTUNIQ; } + } + read_unlock(&local->sub_if_lock); - if (!rx->local->apdev) - return TXRX_DROP; + if (sdata->type == IEEE80211_IF_TYPE_WDS && + is_zero_ether_addr(sdata->u.wds.remote_addr)) + return -ENOLINK; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_sta_not_assoc); - return TXRX_QUEUED; + if (sdata->type == IEEE80211_IF_TYPE_MNTR && local->open_count && + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { + /* run the interface in a "soft monitor" mode */ + local->monitors++; + local->open_count++; + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + return 0; } + ieee80211_if_open(dev); + ieee80211_start_soft_monitor(local); - if (rx->sdata->type == IEEE80211_IF_TYPE_STA) - always_sta_key = 0; + conf.if_id = dev->ifindex; + conf.type = sdata->type; + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + conf.mac_addr = NULL; else - always_sta_key = 1; - - if (rx->sta && rx->sta->key && always_sta_key) { - rx->key = rx->sta->key; - } else { - if (rx->sta && rx->sta->key) - rx->key = rx->sta->key; - else - rx->key = rx->sdata->default_key; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res) { + if (sdata->type == IEEE80211_IF_TYPE_MNTR) + ieee80211_start_hard_monitor(local); + return res; + } - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && - rx->fc & IEEE80211_FCTL_PROTECTED) { - int keyidx = ieee80211_wep_get_keyidx(rx->skb); - - if (keyidx >= 0 && keyidx < NUM_DEFAULT_KEYS && - (!rx->sta || !rx->sta->key || keyidx > 0)) - rx->key = rx->sdata->keys[keyidx]; - - if (!rx->key) { - if (!rx->u.rx.ra_match) - return TXRX_DROP; - printk(KERN_DEBUG "%s: RX WEP frame with " - "unknown keyidx %d (A1=" MAC_FMT " A2=" - MAC_FMT " A3=" MAC_FMT ")\n", - rx->dev->name, keyidx, - MAC_ARG(hdr->addr1), - MAC_ARG(hdr->addr2), - MAC_ARG(hdr->addr3)); - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt( - rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_wep_frame_unknown_key); - return TXRX_QUEUED; + if (local->open_count == 0) { + res = 0; + tasklet_enable(&local->tx_pending_tasklet); + tasklet_enable(&local->tasklet); + if (local->ops->open) + res = local->ops->open(local_to_hw(local)); + if (res == 0) { + res = dev_open(local->mdev); + if (res) { + if (local->ops->stop) + local->ops->stop(local_to_hw(local)); + } else { + res = ieee80211_hw_config(local); + if (res && local->ops->stop) + local->ops->stop(local_to_hw(local)); + else if (!res && local->apdev) + dev_open(local->apdev); } } + if (res) { + if (local->ops->remove_interface) + local->ops->remove_interface(local_to_hw(local), + &conf); + return res; + } } + local->open_count++; - if (rx->fc & IEEE80211_FCTL_PROTECTED && rx->key && rx->u.rx.ra_match) { - rx->key->tx_rx_count++; - if (unlikely(rx->local->key_tx_rx_threshold && - rx->key->tx_rx_count > - rx->local->key_tx_rx_threshold)) { - ieee80211_key_threshold_notify(rx->dev, rx->key, - rx->sta); - } + if (sdata->type == IEEE80211_IF_TYPE_MNTR) { + local->monitors++; + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + } else { + ieee80211_if_config(dev); + ieee80211_reset_erp_info(dev); + ieee80211_enable_keys(sdata); } - return TXRX_CONTINUE; -} + if (sdata->type == IEEE80211_IF_TYPE_STA && + !local->user_space_mlme) + netif_carrier_off(dev); + else + netif_carrier_on(dev); + netif_start_queue(dev); + return 0; +} -static ieee80211_txrx_result -ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx) +static void ieee80211_if_shutdown(struct net_device *dev) { - struct sta_info *sta = rx->sta; - struct net_device *dev = rx->dev; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - - if (!sta) - return TXRX_CONTINUE; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* Update last_rx only for IBSS packets which are for the current - * BSSID to avoid keeping the current IBSS network alive in cases where - * other STAs are using different BSSID. */ - if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { - u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); - if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) - sta->last_rx = jiffies; - } else - if (!is_multicast_ether_addr(hdr->addr1) || - rx->sdata->type == IEEE80211_IF_TYPE_STA) { - /* Update last_rx only for unicast frames in order to prevent - * the Probe Request frames (the only broadcast frames from a - * STA in infrastructure mode) from keeping a connection alive. + ASSERT_RTNL(); + switch (sdata->type) { + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + sdata->u.sta.state = IEEE80211_DISABLED; + del_timer_sync(&sdata->u.sta.timer); + /* + * Holding the sub_if_lock for writing here blocks + * out the receive path and makes sure it's not + * currently processing a packet that may get + * added to the queue. */ - sta->last_rx = jiffies; - } - - if (!rx->u.rx.ra_match) - return TXRX_CONTINUE; + write_lock_bh(&local->sub_if_lock); + skb_queue_purge(&sdata->u.sta.skb_queue); + write_unlock_bh(&local->sub_if_lock); - sta->rx_fragments++; - sta->rx_bytes += rx->skb->len; - sta->last_rssi = (sta->last_rssi * 15 + - rx->u.rx.status->ssi) / 16; - sta->last_signal = (sta->last_signal * 15 + - rx->u.rx.status->signal) / 16; - sta->last_noise = (sta->last_noise * 15 + - rx->u.rx.status->noise) / 16; - - if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { - /* Change STA power saving mode only in the end of a frame - * exchange sequence */ - if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) - rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta); - else if (!(sta->flags & WLAN_STA_PS) && - (rx->fc & IEEE80211_FCTL_PM)) - ap_sta_ps_start(dev, sta); - } - - /* Drop data::nullfunc frames silently, since they are used only to - * control station power saving mode. */ - if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { - I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); - /* Update counter and free packet here to avoid counting this - * as a dropped packed. */ - sta->rx_packets++; - dev_kfree_skb(rx->skb); - return TXRX_QUEUED; - } - - return TXRX_CONTINUE; -} /* ieee80211_rx_h_sta_process */ - - -static ieee80211_txrx_result -ieee80211_rx_h_wep_weak_iv_detection(struct ieee80211_txrx_data *rx) -{ - if (!rx->sta || !(rx->fc & IEEE80211_FCTL_PROTECTED) || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - !rx->key || rx->key->alg != ALG_WEP || !rx->u.rx.ra_match) - return TXRX_CONTINUE; - - /* Check for weak IVs, if hwaccel did not remove IV from the frame */ - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) || - rx->key->force_sw_encrypt) { - u8 *iv = ieee80211_wep_is_weak_iv(rx->skb, rx->key); - if (iv) { - rx->sta->wep_weak_iv_count++; + if (!local->ops->hw_scan && + local->scan_dev == sdata->dev) { + local->sta_scanning = 0; + cancel_delayed_work(&local->scan_work); } + flush_workqueue(local->hw.workqueue); + break; } - - return TXRX_CONTINUE; } - -static ieee80211_txrx_result -ieee80211_rx_h_wep_decrypt(struct ieee80211_txrx_data *rx) +static int ieee80211_stop(struct net_device *dev) { - /* If the device handles decryption totally, skip this test */ - if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) - return TXRX_CONTINUE; - - if ((rx->key && rx->key->alg != ALG_WEP) || - !(rx->fc & IEEE80211_FCTL_PROTECTED) || - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) - return TXRX_CONTINUE; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (!rx->key) { - printk(KERN_DEBUG "%s: RX WEP frame, but no key set\n", - rx->dev->name); - return TXRX_DROP; - } + sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED) || - rx->key->force_sw_encrypt) { - if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { - printk(KERN_DEBUG "%s: RX WEP frame, decrypt " - "failed\n", rx->dev->name); - return TXRX_DROP; - } - } else if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { - ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); - /* remove ICV */ - skb_trim(rx->skb, rx->skb->len - 4); + if (sdata->type == IEEE80211_IF_TYPE_MNTR && + local->open_count > 1 && + !(local->hw.flags & IEEE80211_HW_MONITOR_DURING_OPER)) { + /* remove "soft monitor" interface */ + local->open_count--; + local->monitors--; + if (!local->monitors) + local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; + return 0; } - return TXRX_CONTINUE; -} - + netif_stop_queue(dev); + ieee80211_if_shutdown(dev); -static ieee80211_txrx_result -ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) -{ - if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && - rx->sdata->type != IEEE80211_IF_TYPE_STA && rx->u.rx.ra_match) { - /* Pass both encrypted and unencrypted EAPOL frames to user - * space for processing. */ - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_normal); - return TXRX_QUEUED; + if (sdata->type == IEEE80211_IF_TYPE_MNTR) { + local->monitors--; + if (!local->monitors) + local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; + } else { + /* disable all keys for as long as this netdev is down */ + ieee80211_disable_keys(sdata); } - if (unlikely(rx->sdata->ieee802_1x && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) && - !ieee80211_is_eapol(rx->skb))) { -#ifdef CONFIG_MAC80211_DEBUG - struct ieee80211_hdr *hdr = - (struct ieee80211_hdr *) rx->skb->data; - printk(KERN_DEBUG "%s: dropped frame from " MAC_FMT - " (unauthorized port)\n", rx->dev->name, - MAC_ARG(hdr->addr2)); -#endif /* CONFIG_MAC80211_DEBUG */ - return TXRX_DROP; + local->open_count--; + if (local->open_count == 0) { + if (netif_running(local->mdev)) + dev_close(local->mdev); + if (local->apdev) + dev_close(local->apdev); + if (local->ops->stop) + local->ops->stop(local_to_hw(local)); + tasklet_disable(&local->tx_pending_tasklet); + tasklet_disable(&local->tasklet); } + if (local->ops->remove_interface) { + struct ieee80211_if_init_conf conf; - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) -{ - /* If the device handles decryption totally, skip this test */ - if (rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) - return TXRX_CONTINUE; - - /* Drop unencrypted frames if key is set. */ - if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && - (rx->key || rx->sdata->drop_unencrypted) && - (rx->sdata->eapol == 0 || - !ieee80211_is_eapol(rx->skb)))) { - printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " - "encryption\n", rx->dev->name); - return TXRX_DROP; + conf.if_id = dev->ifindex; + conf.type = sdata->type; + conf.mac_addr = dev->dev_addr; + local->ops->remove_interface(local_to_hw(local), &conf); } - return TXRX_CONTINUE; -} - - -static ieee80211_txrx_result -ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) -{ - struct ieee80211_sub_if_data *sdata; - if (!rx->u.rx.ra_match) - return TXRX_DROP; + ieee80211_start_hard_monitor(local); - sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - if ((sdata->type == IEEE80211_IF_TYPE_STA || - sdata->type == IEEE80211_IF_TYPE_IBSS) && - !rx->local->user_space_mlme) { - ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); - } else { - /* Management frames are sent to hostapd for processing */ - if (!rx->local->apdev) - return TXRX_DROP; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_normal); - } - return TXRX_QUEUED; + return 0; } +enum netif_tx_lock_class { + TX_LOCK_NORMAL, + TX_LOCK_MASTER, +}; -static ieee80211_txrx_result -ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx) +static inline void netif_tx_lock_nested(struct net_device *dev, int subclass) { - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb; - - if (unlikely(local->sta_scanning != 0)) { - ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); - return TXRX_QUEUED; - } - - if (unlikely(rx->u.rx.in_scan)) { - /* scanning finished during invoking of handlers */ - I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); - return TXRX_DROP; - } - - return TXRX_CONTINUE; + spin_lock_nested(&dev->_xmit_lock, subclass); + dev->xmit_lock_owner = smp_processor_id(); } - -static void ieee80211_rx_michael_mic_report(struct net_device *dev, - struct ieee80211_hdr *hdr, - struct sta_info *sta, - struct ieee80211_txrx_data *rx) +static void ieee80211_set_multicast_list(struct net_device *dev) { - int keyidx, hdrlen; - - hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); - if (rx->skb->len >= hdrlen + 4) - keyidx = rx->skb->data[hdrlen + 3] >> 6; - else - keyidx = -1; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + unsigned short flags; - /* TODO: verify that this is not triggered by fragmented - * frames (hw does not verify MIC for them). */ - printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " - "failure from " MAC_FMT " to " MAC_FMT " keyidx=%d\n", - dev->name, MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr1), keyidx); - - if (!sta) { - /* Some hardware versions seem to generate incorrect - * Michael MIC reports; ignore them to avoid triggering - * countermeasures. */ - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for unknown address " MAC_FMT "\n", - dev->name, MAC_ARG(hdr->addr2)); - goto ignore; - } - - if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame with no ISWEP flag (src " - MAC_FMT ")\n", dev->name, MAC_ARG(hdr->addr2)); - goto ignore; - } - - if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && - rx->sdata->type == IEEE80211_IF_TYPE_AP) { - keyidx = ieee80211_wep_get_keyidx(rx->skb); - /* AP with Pairwise keys support should never receive Michael - * MIC errors for non-zero keyidx because these are reserved - * for group keys and only the AP is sending real multicast - * frames in BSS. */ - if (keyidx) { - printk(KERN_DEBUG "%s: ignored Michael MIC error for " - "a frame with non-zero keyidx (%d) (src " MAC_FMT - ")\n", dev->name, keyidx, MAC_ARG(hdr->addr2)); - goto ignore; - } + netif_tx_lock_nested(local->mdev, TX_LOCK_MASTER); + if (((dev->flags & IFF_ALLMULTI) != 0) ^ + ((sdata->flags & IEEE80211_SDATA_ALLMULTI) != 0)) { + if (sdata->flags & IEEE80211_SDATA_ALLMULTI) + local->iff_allmultis--; + else + local->iff_allmultis++; + sdata->flags ^= IEEE80211_SDATA_ALLMULTI; } - - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { - printk(KERN_DEBUG "%s: ignored spurious Michael MIC " - "error for a frame that cannot be encrypted " - "(fc=0x%04x) (src " MAC_FMT ")\n", - dev->name, rx->fc, MAC_ARG(hdr->addr2)); - goto ignore; + if (((dev->flags & IFF_PROMISC) != 0) ^ + ((sdata->flags & IEEE80211_SDATA_PROMISC) != 0)) { + if (sdata->flags & IEEE80211_SDATA_PROMISC) + local->iff_promiscs--; + else + local->iff_promiscs++; + sdata->flags ^= IEEE80211_SDATA_PROMISC; } - - do { - union iwreq_data wrqu; - char *buf = kmalloc(128, GFP_ATOMIC); - if (!buf) - break; - - /* TODO: needed parameters: count, key type, TSC */ - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=" MAC_FMT ")", - keyidx, hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf); - kfree(buf); - } while (0); - - /* TODO: consider verifying the MIC error report with software - * implementation if we get too many spurious reports from the - * hardware. */ - if (!rx->local->apdev) - goto ignore; - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_michael_mic_failure); - return; - - ignore: - dev_kfree_skb(rx->skb); - rx->skb = NULL; -} - -static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers( - struct ieee80211_local *local, - ieee80211_rx_handler *handlers, - struct ieee80211_txrx_data *rx, - struct sta_info *sta) -{ - ieee80211_rx_handler *handler; - ieee80211_txrx_result res = TXRX_DROP; - - for (handler = handlers; *handler != NULL; handler++) { - res = (*handler)(rx); - if (res != TXRX_CONTINUE) { - if (res == TXRX_DROP) { - I802_DEBUG_INC(local->rx_handlers_drop); - if (sta) - sta->rx_dropped++; - } - if (res == TXRX_QUEUED) - I802_DEBUG_INC(local->rx_handlers_queued); - break; - } + if (dev->mc_count != sdata->mc_count) { + local->mc_count = local->mc_count - sdata->mc_count + + dev->mc_count; + sdata->mc_count = dev->mc_count; } - - if (res == TXRX_DROP) { - dev_kfree_skb(rx->skb); + if (local->ops->set_multicast_list) { + flags = local->mdev->flags; + if (local->iff_allmultis) + flags |= IFF_ALLMULTI; + if (local->iff_promiscs) + flags |= IFF_PROMISC; + read_lock(&local->sub_if_lock); + local->ops->set_multicast_list(local_to_hw(local), flags, + local->mc_count); + read_unlock(&local->sub_if_lock); } - return res; + netif_tx_unlock(local->mdev); } -static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local, - ieee80211_rx_handler *handlers, - struct ieee80211_txrx_data *rx, - struct sta_info *sta) -{ - if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) == - TXRX_CONTINUE) - dev_kfree_skb(rx->skb); +/* Must not be called for mdev and apdev */ +void ieee80211_if_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->hard_start_xmit = ieee80211_subif_start_xmit; + dev->wireless_handlers = &ieee80211_iw_handler_def; + dev->set_multicast_list = ieee80211_set_multicast_list; + dev->change_mtu = ieee80211_change_mtu; + dev->get_stats = ieee80211_get_stats; + dev->open = ieee80211_open; + dev->stop = ieee80211_stop; + dev->uninit = ieee80211_if_reinit; + dev->destructor = ieee80211_if_free; } -/* - * This is the receive path handler. It is called by a low level driver when an - * 802.11 MPDU is received from the hardware. - */ -void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_rx_status *status) +/* WDS specialties */ + +int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr) { - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - struct ieee80211_hdr *hdr; - struct ieee80211_txrx_data rx; - u16 type; - int multicast; - int radiotap_len = 0; - - if (status->flag & RX_FLAG_RADIOTAP) { - radiotap_len = ieee80211_get_radiotap_len(skb); - skb_pull(skb, radiotap_len); - } - - hdr = (struct ieee80211_hdr *) skb->data; - memset(&rx, 0, sizeof(rx)); - rx.skb = skb; - rx.local = local; - - rx.u.rx.status = status; - rx.fc = skb->len >= 2 ? le16_to_cpu(hdr->frame_control) : 0; - type = rx.fc & IEEE80211_FCTL_FTYPE; - if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) - local->dot11ReceivedFragmentCount++; - multicast = is_multicast_ether_addr(hdr->addr1); - - if (skb->len >= 16) - sta = rx.sta = sta_info_get(local, hdr->addr2); - else - sta = rx.sta = NULL; - - if (sta) { - rx.dev = sta->dev; - rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); - } - - if ((status->flag & RX_FLAG_MMIC_ERROR)) { - ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx); - goto end; - } - if (unlikely(local->sta_scanning)) - rx.u.rx.in_scan = 1; + if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0) + return 0; - if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, - sta) != TXRX_CONTINUE) - goto end; - skb = rx.skb; + /* Create STA entry for the new peer */ + sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL); + if (!sta) + return -ENOMEM; + sta_info_put(sta); - skb_push(skb, radiotap_len); - if (sta && !sta->assoc_ap && !(sta->flags & WLAN_STA_WDS) && - !local->iff_promiscs && !multicast) { - rx.u.rx.ra_match = 1; - ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, - sta); + /* Remove STA entry for the old peer */ + sta = sta_info_get(local, sdata->u.wds.remote_addr); + if (sta) { + sta_info_free(sta); + sta_info_put(sta); } else { - struct ieee80211_sub_if_data *prev = NULL; - struct sk_buff *skb_new; - u8 *bssid = ieee80211_get_bssid(hdr, skb->len - radiotap_len); - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - rx.u.rx.ra_match = 1; - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - if (!bssid) - continue; - if (!ieee80211_bssid_match(bssid, - sdata->u.sta.bssid)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } else if (!multicast && - compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) { - if (!sdata->promisc) - continue; - rx.u.rx.ra_match = 0; - } - break; - case IEEE80211_IF_TYPE_IBSS: - if (!bssid) - continue; - if (!ieee80211_bssid_match(bssid, - sdata->u.sta.bssid)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } else if (!multicast && - compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) { - if (!sdata->promisc) - continue; - rx.u.rx.ra_match = 0; - } else if (!sta) - sta = rx.sta = - ieee80211_ibss_add_sta(sdata->dev, - skb, bssid, - hdr->addr2); - break; - case IEEE80211_IF_TYPE_AP: - if (!bssid) { - if (compare_ether_addr(sdata->dev->dev_addr, - hdr->addr1) != 0) - continue; - } else if (!ieee80211_bssid_match(bssid, - sdata->dev->dev_addr)) { - if (!rx.u.rx.in_scan) - continue; - rx.u.rx.ra_match = 0; - } - if (sdata->dev == local->mdev && - !rx.u.rx.in_scan) - /* do not receive anything via - * master device when not scanning */ - continue; - break; - case IEEE80211_IF_TYPE_WDS: - if (bssid || - (rx.fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) - continue; - if (compare_ether_addr(sdata->u.wds.remote_addr, - hdr->addr2) != 0) - continue; - break; - } - - if (prev) { - skb_new = skb_copy(skb, GFP_ATOMIC); - if (!skb_new) { - if (net_ratelimit()) - printk(KERN_DEBUG "%s: failed to copy " - "multicast frame for %s", - local->mdev->name, prev->dev->name); - continue; - } - rx.skb = skb_new; - rx.dev = prev->dev; - rx.sdata = prev; - ieee80211_invoke_rx_handlers(local, - local->rx_handlers, - &rx, sta); - } - prev = sdata; - } - if (prev) { - rx.skb = skb; - rx.dev = prev->dev; - rx.sdata = prev; - ieee80211_invoke_rx_handlers(local, local->rx_handlers, - &rx, sta); - } else - dev_kfree_skb(skb); - read_unlock(&local->sub_if_lock); + printk(KERN_DEBUG "%s: could not find STA entry for WDS link " + "peer " MAC_FMT "\n", + dev->name, MAC_ARG(sdata->u.wds.remote_addr)); } - end: - if (sta) - sta_info_put(sta); + /* Update WDS link data */ + memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN); + + return 0; } -EXPORT_SYMBOL(__ieee80211_rx); -static ieee80211_txrx_result -ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx) +/* everything else */ + +static int __ieee80211_if_config(struct net_device *dev, + struct sk_buff *beacon, + struct ieee80211_tx_control *control) { - struct ieee80211_local *local = tx->local; - struct ieee80211_hw_mode *mode = tx->u.tx.mode; - struct sk_buff *skb = tx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_if_conf conf; + static u8 scan_bssid[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - /* TODO: this could be part of tx_status handling, so that the number - * of retries would be known; TX rate should in that case be stored - * somewhere with the packet */ - - /* Estimate total channel use caused by this frame */ - - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (mode->mode == MODE_IEEE80211A || - mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG || - (mode->mode == MODE_IEEE80211G && - tx->u.tx.rate->flags & IEEE80211_RATE_ERP)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; + if (!local->ops->config_interface || !netif_running(dev)) + return 0; - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; - - if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS) - load += 2 * hdrtime; - else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) - load += hdrtime; - - load += skb->len * tx->u.tx.rate->rate_inv; - - if (tx->u.tx.extra_frag) { - int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { - load += 2 * hdrtime; - load += tx->u.tx.extra_frag[i]->len * - tx->u.tx.rate->rate; - } + memset(&conf, 0, sizeof(conf)); + conf.type = sdata->type; + if (sdata->type == IEEE80211_IF_TYPE_STA || + sdata->type == IEEE80211_IF_TYPE_IBSS) { + if (local->sta_scanning && + local->scan_dev == dev) + conf.bssid = scan_bssid; + else + conf.bssid = sdata->u.sta.bssid; + conf.ssid = sdata->u.sta.ssid; + conf.ssid_len = sdata->u.sta.ssid_len; + conf.generic_elem = sdata->u.sta.extra_ie; + conf.generic_elem_len = sdata->u.sta.extra_ie_len; + } else if (sdata->type == IEEE80211_IF_TYPE_AP) { + conf.ssid = sdata->u.ap.ssid; + conf.ssid_len = sdata->u.ap.ssid_len; + conf.generic_elem = sdata->u.ap.generic_elem; + conf.generic_elem_len = sdata->u.ap.generic_elem_len; + conf.beacon = beacon; + conf.beacon_control = control; } - - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (tx->sta) - tx->sta->channel_use_raw += load; - tx->sdata->channel_use_raw += load; - - return TXRX_CONTINUE; + return local->ops->config_interface(local_to_hw(local), + dev->ifindex, &conf); } +int ieee80211_if_config(struct net_device *dev) +{ + return __ieee80211_if_config(dev, NULL, NULL); +} -static ieee80211_txrx_result -ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) +int ieee80211_if_config_beacon(struct net_device *dev) { - struct ieee80211_local *local = rx->local; - struct sk_buff *skb = rx->skb; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u32 load = 0, hdrtime; - struct ieee80211_rate *rate; - struct ieee80211_hw_mode *mode = local->hw.conf.mode; - int i; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_control control; + struct sk_buff *skb; - /* Estimate total channel use caused by this frame */ + if (!(local->hw.flags & IEEE80211_HW_HOST_GEN_BEACON_TEMPLATE)) + return 0; + skb = ieee80211_beacon_get(local_to_hw(local), dev->ifindex, &control); + if (!skb) + return -ENOMEM; + return __ieee80211_if_config(dev, skb, &control); +} - if (unlikely(mode->num_rates < 0)) - return TXRX_CONTINUE; +int ieee80211_hw_config(struct ieee80211_local *local) +{ + struct ieee80211_hw_mode *mode; + struct ieee80211_channel *chan; + int ret = 0; - rate = &mode->rates[0]; - for (i = 0; i < mode->num_rates; i++) { - if (mode->rates[i].val == rx->u.rx.status->rate) { - rate = &mode->rates[i]; - break; - } + if (local->sta_scanning) { + chan = local->scan_channel; + mode = local->scan_hw_mode; + } else { + chan = local->oper_channel; + mode = local->oper_hw_mode; } - /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, - * 1 usec = 1/8 * (1080 / 10) = 13.5 */ - - if (mode->mode == MODE_IEEE80211A || - mode->mode == MODE_ATHEROS_TURBO || - mode->mode == MODE_ATHEROS_TURBOG || - (mode->mode == MODE_IEEE80211G && - rate->flags & IEEE80211_RATE_ERP)) - hdrtime = CHAN_UTIL_HDR_SHORT; - else - hdrtime = CHAN_UTIL_HDR_LONG; - - load = hdrtime; - if (!is_multicast_ether_addr(hdr->addr1)) - load += hdrtime; + local->hw.conf.channel = chan->chan; + local->hw.conf.channel_val = chan->val; + local->hw.conf.power_level = chan->power_level; + local->hw.conf.freq = chan->freq; + local->hw.conf.phymode = mode->mode; + local->hw.conf.antenna_max = chan->antenna_max; + local->hw.conf.chan = chan; + local->hw.conf.mode = mode; - load += skb->len * rate->rate_inv; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "HW CONFIG: channel=%d freq=%d " + "phymode=%d\n", local->hw.conf.channel, local->hw.conf.freq, + local->hw.conf.phymode); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - /* Divide channel_use by 8 to avoid wrapping around the counter */ - load >>= CHAN_UTIL_SHIFT; - local->channel_use_raw += load; - if (rx->sta) - rx->sta->channel_use_raw += load; - rx->u.rx.load = load; + if (local->ops->config) + ret = local->ops->config(local_to_hw(local), &local->hw.conf); - return TXRX_CONTINUE; + return ret; } -static ieee80211_txrx_result -ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx) +void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes) { - rx->sdata->channel_use_raw += rx->u.rx.load; - return TXRX_CONTINUE; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (local->ops->erp_ie_changed) + local->ops->erp_ie_changed(local_to_hw(local), changes, + !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION), + !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)); } -static void ieee80211_stat_refresh(unsigned long data) +void ieee80211_reset_erp_info(struct net_device *dev) { - struct ieee80211_local *local = (struct ieee80211_local *) data; - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; - - if (!local->stat_time) - return; - - /* go through all stations */ - spin_lock_bh(&local->sta_lock); - list_for_each_entry(sta, &local->sta_list, list) { - sta->channel_use = (sta->channel_use_raw / local->stat_time) / - CHAN_UTIL_PER_10MS; - sta->channel_use_raw = 0; - } - spin_unlock_bh(&local->sta_lock); - - /* go through all subinterfaces */ - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - sdata->channel_use = (sdata->channel_use_raw / - local->stat_time) / CHAN_UTIL_PER_10MS; - sdata->channel_use_raw = 0; - } - read_unlock(&local->sub_if_lock); - - /* hardware interface */ - local->channel_use = (local->channel_use_raw / - local->stat_time) / CHAN_UTIL_PER_10MS; - local->channel_use_raw = 0; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - local->stat_timer.expires = jiffies + HZ * local->stat_time / 100; - add_timer(&local->stat_timer); + sdata->flags &= ~(IEEE80211_SDATA_USE_PROTECTION | + IEEE80211_SDATA_SHORT_PREAMBLE); + ieee80211_erp_info_change_notify(dev, + IEEE80211_ERP_CHANGE_PROTECTION | + IEEE80211_ERP_CHANGE_PREAMBLE); } - -/* This is a version of the rx handler that can be called from hard irq - * context. Post the skb on the queue and schedule the tasklet */ -void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, - struct ieee80211_rx_status *status) +struct dev_mc_list *ieee80211_get_mc_list_item(struct ieee80211_hw *hw, + struct dev_mc_list *prev, + void **ptr) { struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = *ptr; + struct dev_mc_list *mc; - BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb)); + if (!prev) { + WARN_ON(sdata); + sdata = NULL; + } + if (!prev || !prev->next) { + if (sdata) + sdata = list_entry(sdata->list.next, + struct ieee80211_sub_if_data, list); + else + sdata = list_entry(local->sub_if_list.next, + struct ieee80211_sub_if_data, list); + if (&sdata->list != &local->sub_if_list) + mc = sdata->dev->mc_list; + else + mc = NULL; + } else + mc = prev->next; - skb->dev = local->mdev; - /* copy status into skb->cb for use by tasklet */ - memcpy(skb->cb, status, sizeof(*status)); - skb->pkt_type = IEEE80211_RX_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); + *ptr = sdata; + return mc; } -EXPORT_SYMBOL(ieee80211_rx_irqsafe); +EXPORT_SYMBOL(ieee80211_get_mc_list_item); void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, @@ -4405,7 +864,6 @@ static void ieee80211_tasklet_handler(un } } - /* Remove added headers (e.g., QoS control), encryption header/MIC, etc. to * make a prepared TX frame (one that has been given to hw) to look like brand * new IEEE 802.11 frame that is ready to go through TX processing again. @@ -4420,10 +878,15 @@ static void ieee80211_remove_tx_extra(st pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; pkt_data->ifindex = control->ifindex; - pkt_data->mgmt_iface = (control->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->req_tx_status = !!(control->flags & IEEE80211_TXCTL_REQ_TX_STATUS); - pkt_data->do_not_encrypt = !!(control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT); - pkt_data->requeue = !!(control->flags & IEEE80211_TXCTL_REQUEUE); + pkt_data->flags = 0; + if (control->flags & IEEE80211_TXCTL_REQ_TX_STATUS) + pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; + if (control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + if (control->flags & IEEE80211_TXCTL_REQUEUE) + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + if (control->type == IEEE80211_IF_TYPE_MGMT) + pkt_data->flags |= IEEE80211_TXPD_MGMT_IFACE; pkt_data->queue = control->queue; hdrlen = ieee80211_get_hdrlen_from_skb(skb); @@ -4431,7 +894,7 @@ static void ieee80211_remove_tx_extra(st if (!key) goto no_key; - switch (key->alg) { + switch (key->conf.alg) { case ALG_WEP: iv_len = WEP_IV_LEN; mic_len = WEP_ICV_LEN; @@ -4448,7 +911,8 @@ static void ieee80211_remove_tx_extra(st goto no_key; } - if (skb->len >= mic_len && key->force_sw_encrypt) + if (skb->len >= mic_len && + !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) skb_trim(skb, skb->len - mic_len); if (skb->len >= iv_len && skb->len > hdrlen) { memmove(skb->data + iv_len, skb->data, hdrlen); @@ -4468,7 +932,6 @@ no_key: } } - void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, struct ieee80211_tx_status *status) { @@ -4687,164 +1150,6 @@ void ieee80211_tx_status(struct ieee8021 } EXPORT_SYMBOL(ieee80211_tx_status); -/* TODO: implement register/unregister functions for adding TX/RX handlers - * into ordered list */ - -/* rx_pre handlers don't have dev and sdata fields available in - * ieee80211_txrx_data */ -static ieee80211_rx_handler ieee80211_rx_pre_handlers[] = -{ - ieee80211_rx_h_parse_qos, - ieee80211_rx_h_load_stats, - NULL -}; - -static ieee80211_rx_handler ieee80211_rx_handlers[] = -{ - ieee80211_rx_h_if_stats, - ieee80211_rx_h_monitor, - ieee80211_rx_h_passive_scan, - ieee80211_rx_h_check, - ieee80211_rx_h_sta_process, - ieee80211_rx_h_ccmp_decrypt, - ieee80211_rx_h_tkip_decrypt, - ieee80211_rx_h_wep_weak_iv_detection, - ieee80211_rx_h_wep_decrypt, - ieee80211_rx_h_defragment, - ieee80211_rx_h_ps_poll, - ieee80211_rx_h_michael_mic_verify, - /* this must be after decryption - so header is counted in MPDU mic - * must be before pae and data, so QOS_DATA format frames - * are not passed to user space by these functions - */ - ieee80211_rx_h_remove_qos_control, - ieee80211_rx_h_802_1x_pae, - ieee80211_rx_h_drop_unencrypted, - ieee80211_rx_h_data, - ieee80211_rx_h_mgmt, - NULL -}; - -static ieee80211_tx_handler ieee80211_tx_handlers[] = -{ - ieee80211_tx_h_check_assoc, - ieee80211_tx_h_sequence, - ieee80211_tx_h_ps_buf, - ieee80211_tx_h_select_key, - ieee80211_tx_h_michael_mic_add, - ieee80211_tx_h_fragment, - ieee80211_tx_h_tkip_encrypt, - ieee80211_tx_h_ccmp_encrypt, - ieee80211_tx_h_wep_encrypt, - ieee80211_tx_h_rate_ctrl, - ieee80211_tx_h_misc, - ieee80211_tx_h_load_stats, - NULL -}; - - -int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct sta_info *sta; - - if (compare_ether_addr(remote_addr, sdata->u.wds.remote_addr) == 0) - return 0; - - /* Create STA entry for the new peer */ - sta = sta_info_add(local, dev, remote_addr, GFP_KERNEL); - if (!sta) - return -ENOMEM; - sta_info_put(sta); - - /* Remove STA entry for the old peer */ - sta = sta_info_get(local, sdata->u.wds.remote_addr); - if (sta) { - sta_info_put(sta); - sta_info_free(sta, 0); - } else { - printk(KERN_DEBUG "%s: could not find STA entry for WDS link " - "peer " MAC_FMT "\n", - dev->name, MAC_ARG(sdata->u.wds.remote_addr)); - } - - /* Update WDS link data */ - memcpy(&sdata->u.wds.remote_addr, remote_addr, ETH_ALEN); - - return 0; -} - -/* Must not be called for mdev and apdev */ -void ieee80211_if_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_subif_start_xmit; - dev->wireless_handlers = &ieee80211_iw_handler_def; - dev->set_multicast_list = ieee80211_set_multicast_list; - dev->change_mtu = ieee80211_change_mtu; - dev->get_stats = ieee80211_get_stats; - dev->open = ieee80211_open; - dev->stop = ieee80211_stop; - dev->uninit = ieee80211_if_reinit; - dev->destructor = ieee80211_if_free; -} - -void ieee80211_if_mgmt_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_mgmt_start_xmit; - dev->change_mtu = ieee80211_change_mtu_apdev; - dev->get_stats = ieee80211_get_stats; - dev->open = ieee80211_mgmt_open; - dev->stop = ieee80211_mgmt_stop; - dev->type = ARPHRD_IEEE80211_PRISM; - dev->hard_header_parse = header_parse_80211; - dev->uninit = ieee80211_if_reinit; - dev->destructor = ieee80211_if_free; -} - -int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, - const char *name) -{ - struct rate_control_ref *ref, *old; - - ASSERT_RTNL(); - if (local->open_count || netif_running(local->mdev) || - (local->apdev && netif_running(local->apdev))) - return -EBUSY; - - ref = rate_control_alloc(name, local); - if (!ref) { - printk(KERN_WARNING "%s: Failed to select rate control " - "algorithm\n", local->mdev->name); - return -ENOENT; - } - - old = local->rate_ctrl; - local->rate_ctrl = ref; - if (old) { - rate_control_put(old); - sta_info_flush(local, NULL); - } - - printk(KERN_DEBUG "%s: Selected rate control " - "algorithm '%s'\n", local->mdev->name, - ref->ops->name); - - - return 0; -} - -static void rate_control_deinitialize(struct ieee80211_local *local) -{ - struct rate_control_ref *ref; - - ref = local->rate_ctrl; - local->rate_ctrl = NULL; - rate_control_put(ref); -} - struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -4927,9 +1232,6 @@ struct ieee80211_hw *ieee80211_alloc_hw( INIT_LIST_HEAD(&local->sub_if_list); INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work); - init_timer(&local->stat_timer); - local->stat_timer.function = ieee80211_stat_refresh; - local->stat_timer.data = (unsigned long) local; ieee80211_rx_bss_list_init(mdev); sta_info_init(local); @@ -5128,9 +1430,6 @@ void ieee80211_unregister_hw(struct ieee rtnl_unlock(); - if (local->stat_time) - del_timer_sync(&local->stat_timer); - ieee80211_rx_bss_list_deinit(local->mdev); ieee80211_clear_tx_pending(local); sta_info_stop(local); @@ -5165,65 +1464,6 @@ void ieee80211_free_hw(struct ieee80211_ } EXPORT_SYMBOL(ieee80211_free_hw); -void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) -{ - struct ieee80211_local *local = hw_to_local(hw); - - if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, - &local->state[queue])) { - if (test_bit(IEEE80211_LINK_STATE_PENDING, - &local->state[queue])) - tasklet_schedule(&local->tx_pending_tasklet); - else - if (!ieee80211_qdisc_installed(local->mdev)) { - if (queue == 0) - netif_wake_queue(local->mdev); - } else - __netif_schedule(local->mdev); - } -} -EXPORT_SYMBOL(ieee80211_wake_queue); - -void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) -{ - struct ieee80211_local *local = hw_to_local(hw); - - if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) - netif_stop_queue(local->mdev); - set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); -} -EXPORT_SYMBOL(ieee80211_stop_queue); - -void ieee80211_start_queues(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - int i; - - for (i = 0; i < local->hw.queues; i++) - clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); - if (!ieee80211_qdisc_installed(local->mdev)) - netif_start_queue(local->mdev); -} -EXPORT_SYMBOL(ieee80211_start_queues); - -void ieee80211_stop_queues(struct ieee80211_hw *hw) -{ - int i; - - for (i = 0; i < hw->queues; i++) - ieee80211_stop_queue(hw, i); -} -EXPORT_SYMBOL(ieee80211_stop_queues); - -void ieee80211_wake_queues(struct ieee80211_hw *hw) -{ - int i; - - for (i = 0; i < hw->queues; i++) - ieee80211_wake_queue(hw, i); -} -EXPORT_SYMBOL(ieee80211_wake_queues); - struct net_device_stats *ieee80211_dev_stats(struct net_device *dev) { struct ieee80211_sub_if_data *sdata; @@ -5251,7 +1491,6 @@ static int __init ieee80211_init(void) return 0; } - static void __exit ieee80211_exit(void) { ieee80211_wme_unregister(); diff -puN net/mac80211/ieee80211_common.h~git-net net/mac80211/ieee80211_common.h --- a/net/mac80211/ieee80211_common.h~git-net +++ a/net/mac80211/ieee80211_common.h @@ -48,13 +48,13 @@ enum ieee80211_msg_type { ieee80211_msg_tx_callback_ack = 1, ieee80211_msg_tx_callback_fail = 2, /* hole at 3, was ieee80211_msg_passive_scan but unused */ - ieee80211_msg_wep_frame_unknown_key = 4, + /* hole at 4, was ieee80211_msg_wep_frame_unknown_key but now unused */ ieee80211_msg_michael_mic_failure = 5, /* hole at 6, was monitor but never sent to userspace */ ieee80211_msg_sta_not_assoc = 7, /* 8 was ieee80211_msg_set_aid_for_sta */ ieee80211_msg_key_threshold_notification = 9, - ieee80211_msg_radar = 11, + /* 11 was ieee80211_msg_radar */ }; struct ieee80211_msg_key_notification { diff -puN net/mac80211/ieee80211_i.h~git-net net/mac80211/ieee80211_i.h --- a/net/mac80211/ieee80211_i.h~git-net +++ a/net/mac80211/ieee80211_i.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "ieee80211_key.h" #include "sta_info.h" @@ -112,6 +113,15 @@ typedef enum { TXRX_CONTINUE, TXRX_DROP, TXRX_QUEUED } ieee80211_txrx_result; +/* flags used in struct ieee80211_txrx_data.flags */ +/* whether the MSDU was fragmented */ +#define IEEE80211_TXRXD_FRAGMENTED BIT(0) +#define IEEE80211_TXRXD_TXUNICAST BIT(1) +#define IEEE80211_TXRXD_TXPS_BUFFERED BIT(2) +#define IEEE80211_TXRXD_TXPROBE_LAST_FRAG BIT(3) +#define IEEE80211_TXRXD_RXIN_SCAN BIT(4) +/* frame is destined to interface currently processed (incl. multicast frames) */ +#define IEEE80211_TXRXD_RXRA_MATCH BIT(5) struct ieee80211_txrx_data { struct sk_buff *skb; struct net_device *dev; @@ -120,14 +130,10 @@ struct ieee80211_txrx_data { struct sta_info *sta; u16 fc, ethertype; struct ieee80211_key *key; - unsigned int fragmented:1; /* whether the MSDU was fragmented */ + unsigned int flags; union { struct { struct ieee80211_tx_control *control; - unsigned int unicast:1; - unsigned int ps_buffered:1; - unsigned int short_preamble:1; - unsigned int probe_last_frag:1; struct ieee80211_hw_mode *mode; struct ieee80211_rate *rate; /* use this rate (if set) for last fragment; rate can @@ -147,23 +153,21 @@ struct ieee80211_txrx_data { int sent_ps_buffered; int queue; int load; - unsigned int in_scan:1; - /* frame is destined to interface currently processed - * (including multicast frames) */ - unsigned int ra_match:1; } rx; } u; }; +/* flags used in struct ieee80211_tx_packet_data.flags */ +#define IEEE80211_TXPD_REQ_TX_STATUS BIT(0) +#define IEEE80211_TXPD_DO_NOT_ENCRYPT BIT(1) +#define IEEE80211_TXPD_REQUEUE BIT(2) +#define IEEE80211_TXPD_MGMT_IFACE BIT(3) /* Stored in sk_buff->cb */ struct ieee80211_tx_packet_data { int ifindex; unsigned long jiffies; - unsigned int req_tx_status:1; - unsigned int do_not_encrypt:1; - unsigned int requeue:1; - unsigned int mgmt_iface:1; - unsigned int queue:4; + unsigned int flags; + u8 queue; }; struct ieee80211_tx_stored_packet { @@ -174,7 +178,7 @@ struct ieee80211_tx_stored_packet { int last_frag_rateidx; int last_frag_hwrate; struct ieee80211_rate *last_frag_rate; - unsigned int last_frag_rate_ctrl_probe:1; + unsigned int last_frag_rate_ctrl_probe; }; typedef ieee80211_txrx_result (*ieee80211_tx_handler) @@ -213,6 +217,19 @@ struct ieee80211_if_vlan { u8 id; }; +/* flags used in struct ieee80211_if_sta.flags */ +#define IEEE80211_STA_SSID_SET BIT(0) +#define IEEE80211_STA_BSSID_SET BIT(1) +#define IEEE80211_STA_PREV_BSSID_SET BIT(2) +#define IEEE80211_STA_AUTHENTICATED BIT(3) +#define IEEE80211_STA_ASSOCIATED BIT(4) +#define IEEE80211_STA_PROBEREQ_POLL BIT(5) +#define IEEE80211_STA_CREATE_IBSS BIT(6) +#define IEEE80211_STA_MIXED_CELL BIT(7) +#define IEEE80211_STA_WMM_ENABLED BIT(8) +#define IEEE80211_STA_AUTO_SSID_SEL BIT(10) +#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) +#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) struct ieee80211_if_sta { enum { IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, @@ -235,25 +252,14 @@ struct ieee80211_if_sta { int auth_tries, assoc_tries; - unsigned int ssid_set:1; - unsigned int bssid_set:1; - unsigned int prev_bssid_set:1; - unsigned int authenticated:1; - unsigned int associated:1; - unsigned int probereq_poll:1; - unsigned int create_ibss:1; - unsigned int mixed_cell:1; - unsigned int wmm_enabled:1; - unsigned int auto_ssid_sel:1; - unsigned int auto_bssid_sel:1; - unsigned int auto_channel_sel:1; + unsigned int flags; #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_AUTH 1 #define IEEE80211_STA_REQ_RUN 2 unsigned long request; struct sk_buff_head skb_queue; - int key_mgmt; + int key_management_enabled; unsigned long last_probe; #define IEEE80211_AUTH_ALG_OPEN BIT(0) @@ -271,19 +277,29 @@ struct ieee80211_if_sta { }; +/* flags used in struct ieee80211_sub_if_data.flags */ +#define IEEE80211_SDATA_ALLMULTI BIT(0) +#define IEEE80211_SDATA_PROMISC BIT(1) +#define IEEE80211_SDATA_USE_PROTECTION BIT(2) /* CTS protect ERP frames */ +/* use short preamble with IEEE 802.11b: this flag is set when the AP or beacon + * generator reports that there are no present stations that cannot support short + * preambles */ +#define IEEE80211_SDATA_SHORT_PREAMBLE BIT(3) struct ieee80211_sub_if_data { struct list_head list; unsigned int type; struct wireless_dev wdev; + /* keys */ + struct list_head key_list; + struct net_device *dev; struct ieee80211_local *local; int mc_count; - unsigned int allmulti:1; - unsigned int promisc:1; - unsigned int use_protection:1; /* CTS protect ERP frames */ + + unsigned int flags; struct net_device_stats stats; int drop_unencrypted; @@ -416,10 +432,9 @@ struct ieee80211_local { struct sk_buff_head skb_queue_unreliable; /* Station data structures */ - spinlock_t sta_lock; /* mutex for STA data structures */ + rwlock_t sta_lock; /* protects STA data structures */ int num_sta; /* number of stations in sta_list */ struct list_head sta_list; - struct list_head deleted_sta_list; struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; @@ -447,7 +462,6 @@ struct ieee80211_local { int fragmentation_threshold; int short_retry_limit; /* dot11ShortRetryLimit */ int long_retry_limit; /* dot11LongRetryLimit */ - int short_preamble; /* use short preamble with IEEE 802.11b */ struct crypto_blkcipher *wep_tx_tfm; struct crypto_blkcipher *wep_rx_tfm; @@ -506,19 +520,11 @@ struct ieee80211_local { u32 channel_use; u32 channel_use_raw; - u32 stat_time; - struct timer_list stat_timer; #ifdef CONFIG_MAC80211_DEBUGFS struct work_struct sta_debugfs_add; #endif - enum { - STA_ANTENNA_SEL_AUTO = 0, - STA_ANTENNA_SEL_SW_CTRL = 1, - STA_ANTENNA_SEL_SW_CTRL_DEBUG = 2 - } sta_antenna_sel; - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS /* TX/RX handler statistics */ unsigned int tx_handlers_drop; @@ -548,16 +554,9 @@ struct ieee80211_local { #endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - int default_wep_only; /* only default WEP keys are used with this - * interface; this is used to decide when hwaccel - * can be used with default keys */ int total_ps_buffered; /* total number of all buffered unicast and * multicast packets for power saving stations */ - int allow_broadcast_always; /* whether to allow TX of broadcast frames - * even when there are no associated STAs - */ - int wifi_wme_noack_test; unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ @@ -572,7 +571,6 @@ struct ieee80211_local { struct local_debugfsdentries { struct dentry *channel; struct dentry *frequency; - struct dentry *radar_detect; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; struct dentry *bridge_packets; @@ -584,7 +582,6 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *mode; struct dentry *wep_iv; - struct dentry *tx_power_reduction; struct dentry *modes; struct dentry *statistics; struct local_debugfsdentries_statsdentries { @@ -668,9 +665,9 @@ static inline void __bss_tim_set(struct static inline void bss_tim_set(struct ieee80211_local *local, struct ieee80211_if_ap *bss, int aid) { - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); __bss_tim_set(bss, aid); - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); } static inline void __bss_tim_clear(struct ieee80211_if_ap *bss, int aid) @@ -685,9 +682,9 @@ static inline void __bss_tim_clear(struc static inline void bss_tim_clear(struct ieee80211_local *local, struct ieee80211_if_ap *bss, int aid) { - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); __bss_tim_clear(bss, aid); - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); } /** @@ -707,35 +704,35 @@ static inline int ieee80211_is_erp_rate( return 0; } +static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) +{ + return compare_ether_addr(raddr, addr) == 0 || + is_broadcast_ether_addr(raddr); +} + + /* ieee80211.c */ int ieee80211_hw_config(struct ieee80211_local *local); int ieee80211_if_config(struct net_device *dev); int ieee80211_if_config_beacon(struct net_device *dev); -struct ieee80211_key_conf * -ieee80211_key_data2conf(struct ieee80211_local *local, - const struct ieee80211_key *data); -struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, - int idx, size_t key_len, gfp_t flags); -void ieee80211_key_free(struct ieee80211_key *key); void ieee80211_rx_mgmt(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_rx_status *status, u32 msg_type); void ieee80211_prepare_rates(struct ieee80211_local *local, struct ieee80211_hw_mode *mode); void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx); int ieee80211_if_update_wds(struct net_device *dev, u8 *remote_addr); -int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); -int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); void ieee80211_if_setup(struct net_device *dev); void ieee80211_if_mgmt_setup(struct net_device *dev); -int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, - const char *name); struct net_device_stats *ieee80211_dev_stats(struct net_device *dev); +struct ieee80211_rate *ieee80211_get_rate(struct ieee80211_local *local, + int phymode, int hwrate); +void ieee80211_key_threshold_notify(struct net_device *dev, + struct ieee80211_key *key, + struct sta_info *sta); /* ieee80211_ioctl.c */ extern const struct iw_handler_def ieee80211_iw_handler_def; -void ieee80211_update_default_wep_only(struct ieee80211_local *local); - /* Least common multiple of the used rates (in 100 kbps). This is used to * calculate rate_inv values for each rate so that only integers are needed. */ @@ -783,6 +780,8 @@ struct sta_info * ieee80211_ibss_add_sta u8 *addr); int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); +void ieee80211_erp_info_change_notify(struct net_device *dev, u8 changes); +void ieee80211_reset_erp_info(struct net_device *dev); /* ieee80211_iface.c */ int ieee80211_if_add(struct net_device *dev, const char *name, @@ -801,7 +800,28 @@ void ieee80211_if_del_mgmt(struct ieee80 void ieee80211_regdomain_init(void); void ieee80211_set_default_regdomain(struct ieee80211_hw_mode *mode); -/* for wiphy privid */ -extern void *mac80211_wiphy_privid; +/* rx handling */ +extern ieee80211_rx_handler ieee80211_rx_pre_handlers[]; +extern ieee80211_rx_handler ieee80211_rx_handlers[]; + +/* tx handling */ +extern ieee80211_tx_handler ieee80211_tx_handlers[]; +void ieee80211_clear_tx_pending(struct ieee80211_local *local); +void ieee80211_tx_pending(unsigned long data); +int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); +int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev); + +/* utility functions/constants */ +extern void *mac80211_wiphy_privid; /* for wiphy privid */ +extern const unsigned char rfc1042_header[6]; +extern const unsigned char bridge_tunnel_header[6]; +u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len); +int ieee80211_is_eapol(const struct sk_buff *skb); +int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, + int rate, int erp, int short_preamble); +void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, + struct ieee80211_hdr *hdr); #endif /* IEEE80211_I_H */ diff -puN net/mac80211/ieee80211_iface.c~git-net net/mac80211/ieee80211_iface.c --- a/net/mac80211/ieee80211_iface.c~git-net +++ a/net/mac80211/ieee80211_iface.c @@ -25,6 +25,8 @@ void ieee80211_if_sdata_init(struct ieee sdata->eapol = 1; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) skb_queue_head_init(&sdata->fragments[i].skb_list); + + INIT_LIST_HEAD(&sdata->key_list); } static void ieee80211_if_sdata_deinit(struct ieee80211_sub_if_data *sdata) @@ -88,8 +90,6 @@ int ieee80211_if_add(struct net_device * *new_dev = ndev; write_unlock_bh(&local->sub_if_lock); - ieee80211_update_default_wep_only(local); - return 0; fail: @@ -154,7 +154,6 @@ void ieee80211_if_del_mgmt(struct ieee80 void ieee80211_if_set_type(struct net_device *dev, int type) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int oldtype = sdata->type; dev->hard_start_xmit = ieee80211_subif_start_xmit; @@ -187,10 +186,10 @@ void ieee80211_if_set_type(struct net_de ifsta->capab = WLAN_CAPABILITY_ESS; ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | IEEE80211_AUTH_ALG_SHARED_KEY; - ifsta->create_ibss = 1; - ifsta->wmm_enabled = 1; - ifsta->auto_channel_sel = 1; - ifsta->auto_bssid_sel = 1; + ifsta->flags |= IEEE80211_STA_CREATE_IBSS | + IEEE80211_STA_WMM_ENABLED | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; msdata = IEEE80211_DEV_TO_SUB_IF(sdata->local->mdev); sdata->bss = &msdata->u.ap; @@ -205,7 +204,6 @@ void ieee80211_if_set_type(struct net_de dev->name, __FUNCTION__, type); } ieee80211_debugfs_change_if_type(sdata, oldtype); - ieee80211_update_default_wep_only(local); } /* Must be called with rtnl lock held. */ @@ -214,24 +212,12 @@ void ieee80211_if_reinit(struct net_devi struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int i; ASSERT_RTNL(); + + ieee80211_free_keys(sdata); + ieee80211_if_sdata_deinit(sdata); - for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (!sdata->keys[i]) - continue; -#if 0 - /* The interface is down at the moment, so there is not - * really much point in disabling the keys at this point. */ - memset(addr, 0xff, ETH_ALEN); - if (local->ops->set_key) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, addr, - local->keys[i], 0); -#endif - ieee80211_key_free(sdata->keys[i]); - sdata->keys[i] = NULL; - } switch (sdata->type) { case IEEE80211_IF_TYPE_AP: { @@ -272,8 +258,8 @@ void ieee80211_if_reinit(struct net_devi case IEEE80211_IF_TYPE_WDS: sta = sta_info_get(local, sdata->u.wds.remote_addr); if (sta) { + sta_info_free(sta); sta_info_put(sta); - sta_info_free(sta, 0); } else { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Someone had deleted my STA " @@ -335,7 +321,6 @@ int ieee80211_if_remove(struct net_devic list_del(&sdata->list); write_unlock_bh(&local->sub_if_lock); __ieee80211_if_del(local, sdata); - ieee80211_update_default_wep_only(local); return 0; } } diff -puN net/mac80211/ieee80211_ioctl.c~git-net net/mac80211/ieee80211_ioctl.c --- a/net/mac80211/ieee80211_ioctl.c~git-net +++ a/net/mac80211/ieee80211_ioctl.c @@ -25,37 +25,6 @@ #include "ieee80211_rate.h" #include "wpa.h" #include "aes_ccm.h" -#include "debugfs_key.h" - -static void ieee80211_set_hw_encryption(struct net_device *dev, - struct sta_info *sta, u8 addr[ETH_ALEN], - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf = NULL; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - /* default to sw encryption; this will be cleared by low-level - * driver if the hw supports requested encryption */ - if (key) - key->force_sw_encrypt = 1; - - if (key && local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, key))) { - if (local->ops->set_key(local_to_hw(local), SET_KEY, addr, - keyconf, sta ? sta->aid : 0)) { - key->force_sw_encrypt = 1; - key->hw_key_idx = HW_KEY_IDX_INVALID; - } else { - key->force_sw_encrypt = - !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT); - key->hw_key_idx = - keyconf->hw_key_idx; - - } - } - kfree(keyconf); -} - static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, int idx, int alg, int set_tx_key, @@ -64,9 +33,7 @@ static int ieee80211_set_encryption(stru struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); int ret = 0; struct sta_info *sta; - struct ieee80211_key *key, *old_key; - int try_hwaccel = 1; - struct ieee80211_key_conf *keyconf; + struct ieee80211_key *key; struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -79,16 +46,6 @@ static int ieee80211_set_encryption(stru return -EINVAL; } key = sdata->keys[idx]; - - /* TODO: consider adding hwaccel support for these; at least - * Atheros key cache should be able to handle this since AP is - * only transmitting frames with default keys. */ - /* FIX: hw key cache can be used when only one virtual - * STA is associated with each AP. If more than one STA - * is associated to the same AP, software encryption - * must be used. This should be done automatically - * based on configured station devices. For the time - * being, this can be only set at compile time. */ } else { set_tx_key = 0; if (idx != 0) { @@ -111,144 +68,28 @@ static int ieee80211_set_encryption(stru key = sta->key; } - /* FIX: - * Cannot configure default hwaccel keys with WEP algorithm, if - * any of the virtual interfaces is using static WEP - * configuration because hwaccel would otherwise try to decrypt - * these frames. - * - * For now, just disable WEP hwaccel for broadcast when there is - * possibility of conflict with default keys. This can maybe later be - * optimized by using non-default keys (at least with Atheros ar521x). - */ - if (!sta && alg == ALG_WEP && !local->default_wep_only && - sdata->type != IEEE80211_IF_TYPE_IBSS && - sdata->type != IEEE80211_IF_TYPE_AP) { - try_hwaccel = 0; - } - - if (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) { - /* Software encryption cannot be used with devices that hide - * encryption from the host system, so always try to use - * hardware acceleration with such devices. */ - try_hwaccel = 1; - } - - if ((local->hw.flags & IEEE80211_HW_NO_TKIP_WMM_HWACCEL) && - alg == ALG_TKIP) { - if (sta && (sta->flags & WLAN_STA_WME)) { - /* Hardware does not support hwaccel with TKIP when using WMM. - */ - try_hwaccel = 0; - } - else if (sdata->type == IEEE80211_IF_TYPE_STA) { - sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta) { - if (sta->flags & WLAN_STA_WME) { - try_hwaccel = 0; - } - sta_info_put(sta); - sta = NULL; - } - } - } - if (alg == ALG_NONE) { - keyconf = NULL; - if (try_hwaccel && key && - key->hw_key_idx != HW_KEY_IDX_INVALID && - local->ops->set_key && - (keyconf = ieee80211_key_data2conf(local, key)) != NULL && - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta_addr, keyconf, sta ? sta->aid : 0)) { - printk(KERN_DEBUG "%s: set_encrypt - low-level disable" - " failed\n", dev->name); - ret = -EINVAL; - } - kfree(keyconf); - - if (set_tx_key || sdata->default_key == key) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = NULL; - } - ieee80211_debugfs_key_remove(key); - if (sta) - sta->key = NULL; - else - sdata->keys[idx] = NULL; ieee80211_key_free(key); key = NULL; } else { - old_key = key; - key = ieee80211_key_alloc(sta ? NULL : sdata, idx, key_len, - GFP_KERNEL); + /* + * Need to free it before allocating a new one with + * with the same index or the ordering to the driver's + * set_key() callback becomes confused. + */ + ieee80211_key_free(key); + key = ieee80211_key_alloc(sdata, sta, alg, idx, key_len, _key); if (!key) { ret = -ENOMEM; goto err_out; } - - /* default to sw encryption; low-level driver sets these if the - * requested encryption is supported */ - key->hw_key_idx = HW_KEY_IDX_INVALID; - key->force_sw_encrypt = 1; - - key->alg = alg; - key->keyidx = idx; - key->keylen = key_len; - memcpy(key->key, _key, key_len); - if (set_tx_key) - key->default_tx_key = 1; - - if (alg == ALG_CCMP) { - /* Initialize AES key state here as an optimization - * so that it does not need to be initialized for every - * packet. */ - key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt( - key->key); - if (!key->u.ccmp.tfm) { - ret = -ENOMEM; - goto err_free; - } - } - - if (set_tx_key || sdata->default_key == old_key) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = NULL; - } - ieee80211_debugfs_key_remove(old_key); - if (sta) - sta->key = key; - else - sdata->keys[idx] = key; - ieee80211_key_free(old_key); - ieee80211_debugfs_key_add(local, key); - if (sta) - ieee80211_debugfs_key_sta_link(key, sta); - - if (try_hwaccel && - (alg == ALG_WEP || alg == ALG_TKIP || alg == ALG_CCMP)) - ieee80211_set_hw_encryption(dev, sta, sta_addr, key); - } - - if (set_tx_key || (!sta && !sdata->default_key && key)) { - sdata->default_key = key; - if (key) - ieee80211_debugfs_key_add_default(sdata); - - if (local->ops->set_key_idx && - local->ops->set_key_idx(local_to_hw(local), idx)) - printk(KERN_DEBUG "%s: failed to set TX key idx for " - "low-level driver\n", dev->name); } - if (sta) - sta_info_put(sta); - - return 0; + if (set_tx_key || (!sta && !sdata->default_key && key)) + ieee80211_set_default_key(sdata, idx); -err_free: - ieee80211_key_free(key); -err_out: + ret = 0; + err_out: if (sta) sta_info_put(sta); return ret; @@ -270,7 +111,7 @@ static int ieee80211_ioctl_siwgenie(stru int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); if (ret) return ret; - sdata->u.sta.auto_bssid_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ieee80211_sta_req_auth(dev, &sdata->u.sta); return 0; } @@ -287,16 +128,6 @@ static int ieee80211_ioctl_siwgenie(stru return -EOPNOTSUPP; } -static int ieee80211_ioctl_set_radio_enabled(struct net_device *dev, - int val) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_conf *conf = &local->hw.conf; - - conf->radio_enabled = val; - return ieee80211_hw_config(wdev_priv(dev->ieee80211_ptr)); -} - static int ieee80211_ioctl_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra) @@ -512,13 +343,14 @@ static int ieee80211_ioctl_siwfreq(struc struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_STA) - sdata->u.sta.auto_channel_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { if (sdata->type == IEEE80211_IF_TYPE_STA) - sdata->u.sta.auto_channel_sel = 1; + sdata->u.sta.flags |= + IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else return ieee80211_set_channel(local, freq->m, -1); @@ -573,7 +405,10 @@ static int ieee80211_ioctl_siwessid(stru sdata->u.sta.ssid_len = len; return 0; } - sdata->u.sta.auto_ssid_sel = !data->flags; + if (data->flags) + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; + else + sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; ret = ieee80211_sta_set_ssid(dev, ssid, len); if (ret) return ret; @@ -640,13 +475,13 @@ static int ieee80211_ioctl_siwap(struct ETH_ALEN); return 0; } - if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) { - sdata->u.sta.auto_bssid_sel = 1; - sdata->u.sta.auto_channel_sel = 1; - } else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) - sdata->u.sta.auto_bssid_sel = 1; + if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; + else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; else - sdata->u.sta.auto_bssid_sel = 0; + sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data); if (ret) return ret; @@ -798,6 +633,20 @@ static int ieee80211_ioctl_giwrate(struc return 0; } +static int ieee80211_ioctl_giwtxpower(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *data, char *extra) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + + data->txpower.fixed = 1; + data->txpower.disabled = !(local->hw.conf.radio_enabled); + data->txpower.value = local->hw.conf.power_level; + data->txpower.flags = IW_TXPOW_DBM; + + return 0; +} + static int ieee80211_ioctl_siwrts(struct net_device *dev, struct iw_request_info *info, struct iw_param *rts, char *extra) @@ -930,101 +779,6 @@ static int ieee80211_ioctl_giwretry(stru return 0; } -static void ieee80211_key_enable_hwaccel(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf; - u8 addr[ETH_ALEN]; - - if (!key || key->alg != ALG_WEP || !key->force_sw_encrypt || - (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)) - return; - - memset(addr, 0xff, ETH_ALEN); - keyconf = ieee80211_key_data2conf(local, key); - if (keyconf && local->ops->set_key && - local->ops->set_key(local_to_hw(local), - SET_KEY, addr, keyconf, 0) == 0) { - key->force_sw_encrypt = - !!(keyconf->flags & IEEE80211_KEY_FORCE_SW_ENCRYPT); - key->hw_key_idx = keyconf->hw_key_idx; - } - kfree(keyconf); -} - - -static void ieee80211_key_disable_hwaccel(struct ieee80211_local *local, - struct ieee80211_key *key) -{ - struct ieee80211_key_conf *keyconf; - u8 addr[ETH_ALEN]; - - if (!key || key->alg != ALG_WEP || key->force_sw_encrypt || - (local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP)) - return; - - memset(addr, 0xff, ETH_ALEN); - keyconf = ieee80211_key_data2conf(local, key); - if (keyconf && local->ops->set_key) - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - addr, keyconf, 0); - kfree(keyconf); - key->force_sw_encrypt = 1; -} - - -static int ieee80211_ioctl_default_wep_only(struct ieee80211_local *local, - int value) -{ - int i; - struct ieee80211_sub_if_data *sdata; - - local->default_wep_only = value; - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) - for (i = 0; i < NUM_DEFAULT_KEYS; i++) - if (value) - ieee80211_key_enable_hwaccel(local, - sdata->keys[i]); - else - ieee80211_key_disable_hwaccel(local, - sdata->keys[i]); - read_unlock(&local->sub_if_lock); - - return 0; -} - - -void ieee80211_update_default_wep_only(struct ieee80211_local *local) -{ - int i = 0; - struct ieee80211_sub_if_data *sdata; - - read_lock(&local->sub_if_lock); - list_for_each_entry(sdata, &local->sub_if_list, list) { - - if (sdata->dev == local->mdev) - continue; - - /* If there is an AP interface then depend on userspace to - set default_wep_only correctly. */ - if (sdata->type == IEEE80211_IF_TYPE_AP) { - read_unlock(&local->sub_if_lock); - return; - } - - i++; - } - - read_unlock(&local->sub_if_lock); - - if (i <= 1) - ieee80211_ioctl_default_wep_only(local, 1); - else - ieee80211_ioctl_default_wep_only(local, 0); -} - - static int ieee80211_ioctl_prism2_param(struct net_device *dev, struct iw_request_info *info, void *wrqu, char *extra) @@ -1054,25 +808,31 @@ static int ieee80211_ioctl_prism2_param( break; case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - if (sdata->type != IEEE80211_IF_TYPE_AP) + if (sdata->type == IEEE80211_IF_TYPE_AP) { + if (value) + sdata->flags |= IEEE80211_SDATA_USE_PROTECTION; + else + sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION; + ieee80211_erp_info_change_notify(dev, + IEEE80211_ERP_CHANGE_PROTECTION); + } else { ret = -ENOENT; - else - sdata->use_protection = value; + } break; case PRISM2_PARAM_PREAMBLE: - local->short_preamble = value; - break; - - case PRISM2_PARAM_STAT_TIME: - if (!local->stat_time && value) { - local->stat_timer.expires = jiffies + HZ * value / 100; - add_timer(&local->stat_timer); - } else if (local->stat_time && !value) { - del_timer_sync(&local->stat_timer); + if (sdata->type == IEEE80211_IF_TYPE_AP) { + if (value) + sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE; + else + sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE; + ieee80211_erp_info_change_notify(dev, + IEEE80211_ERP_CHANGE_PREAMBLE); + } else { + ret = -ENOENT; } - local->stat_time = value; break; + case PRISM2_PARAM_SHORT_SLOT_TIME: if (value) local->hw.conf.flags |= IEEE80211_CONF_SHORT_SLOT_TIME; @@ -1086,35 +846,10 @@ static int ieee80211_ioctl_prism2_param( local->next_mode = value; break; - case PRISM2_PARAM_RADIO_ENABLED: - ret = ieee80211_ioctl_set_radio_enabled(dev, value); - break; - - case PRISM2_PARAM_ANTENNA_MODE: - local->hw.conf.antenna_mode = value; - if (ieee80211_hw_config(local)) - ret = -EINVAL; - break; - - case PRISM2_PARAM_STA_ANTENNA_SEL: - local->sta_antenna_sel = value; - break; - - case PRISM2_PARAM_TX_POWER_REDUCTION: - if (value < 0) - ret = -EINVAL; - else - local->hw.conf.tx_power_reduction = value; - break; - case PRISM2_PARAM_KEY_TX_RX_THRESHOLD: local->key_tx_rx_threshold = value; break; - case PRISM2_PARAM_DEFAULT_WEP_ONLY: - ret = ieee80211_ioctl_default_wep_only(local, value); - break; - case PRISM2_PARAM_WIFI_WME_NOACK_TEST: local->wifi_wme_noack_test = value; break; @@ -1127,8 +862,12 @@ static int ieee80211_ioctl_prism2_param( if (sdata->type != IEEE80211_IF_TYPE_STA && sdata->type != IEEE80211_IF_TYPE_IBSS) ret = -EINVAL; - else - sdata->u.sta.mixed_cell = !!value; + else { + if (value) + sdata->u.sta.flags |= IEEE80211_STA_MIXED_CELL; + else + sdata->u.sta.flags &= ~IEEE80211_STA_MIXED_CELL; + } break; case PRISM2_PARAM_HW_MODES: @@ -1138,21 +877,23 @@ static int ieee80211_ioctl_prism2_param( case PRISM2_PARAM_CREATE_IBSS: if (sdata->type != IEEE80211_IF_TYPE_IBSS) ret = -EINVAL; - else - sdata->u.sta.create_ibss = !!value; + else { + if (value) + sdata->u.sta.flags |= IEEE80211_STA_CREATE_IBSS; + else + sdata->u.sta.flags &= ~IEEE80211_STA_CREATE_IBSS; + } break; case PRISM2_PARAM_WMM_ENABLED: if (sdata->type != IEEE80211_IF_TYPE_STA && sdata->type != IEEE80211_IF_TYPE_IBSS) ret = -EINVAL; - else - sdata->u.sta.wmm_enabled = !!value; - break; - case PRISM2_PARAM_RADAR_DETECT: - local->hw.conf.radar_detect = value; - break; - case PRISM2_PARAM_SPECTRUM_MGMT: - local->hw.conf.spect_mgmt = value; + else { + if (value) + sdata->u.sta.flags |= IEEE80211_STA_WMM_ENABLED; + else + sdata->u.sta.flags &= ~IEEE80211_STA_WMM_ENABLED; + } break; default: ret = -EOPNOTSUPP; @@ -1180,16 +921,13 @@ static int ieee80211_ioctl_get_prism2_pa break; case PRISM2_PARAM_CTS_PROTECT_ERP_FRAMES: - *param = sdata->use_protection; + *param = !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION); break; case PRISM2_PARAM_PREAMBLE: - *param = local->short_preamble; + *param = !!(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); break; - case PRISM2_PARAM_STAT_TIME: - *param = local->stat_time; - break; case PRISM2_PARAM_SHORT_SLOT_TIME: *param = !!(local->hw.conf.flags & IEEE80211_CONF_SHORT_SLOT_TIME); break; @@ -1198,26 +936,10 @@ static int ieee80211_ioctl_get_prism2_pa *param = local->next_mode; break; - case PRISM2_PARAM_ANTENNA_MODE: - *param = local->hw.conf.antenna_mode; - break; - - case PRISM2_PARAM_STA_ANTENNA_SEL: - *param = local->sta_antenna_sel; - break; - - case PRISM2_PARAM_TX_POWER_REDUCTION: - *param = local->hw.conf.tx_power_reduction; - break; - case PRISM2_PARAM_KEY_TX_RX_THRESHOLD: *param = local->key_tx_rx_threshold; break; - case PRISM2_PARAM_DEFAULT_WEP_ONLY: - *param = local->default_wep_only; - break; - case PRISM2_PARAM_WIFI_WME_NOACK_TEST: *param = local->wifi_wme_noack_test; break; @@ -1234,7 +956,8 @@ static int ieee80211_ioctl_get_prism2_pa if (sdata->type != IEEE80211_IF_TYPE_IBSS) ret = -EINVAL; else - *param = !!sdata->u.sta.create_ibss; + *param = !!(sdata->u.sta.flags & + IEEE80211_STA_CREATE_IBSS); break; case PRISM2_PARAM_MIXED_CELL: @@ -1242,14 +965,17 @@ static int ieee80211_ioctl_get_prism2_pa sdata->type != IEEE80211_IF_TYPE_IBSS) ret = -EINVAL; else - *param = !!sdata->u.sta.mixed_cell; + *param = !!(sdata->u.sta.flags & + IEEE80211_STA_MIXED_CELL); break; + case PRISM2_PARAM_WMM_ENABLED: if (sdata->type != IEEE80211_IF_TYPE_STA && sdata->type != IEEE80211_IF_TYPE_IBSS) ret = -EINVAL; else - *param = !!sdata->u.sta.wmm_enabled; + *param = !!(sdata->u.sta.flags & + IEEE80211_STA_WMM_ENABLED); break; default: ret = -EOPNOTSUPP; @@ -1312,12 +1038,7 @@ static int ieee80211_ioctl_siwencode(str alg = ALG_NONE; else if (erq->length == 0) { /* No key data - just set the default TX key index */ - if (sdata->default_key != sdata->keys[idx]) { - ieee80211_debugfs_key_remove_default(sdata); - sdata->default_key = sdata->keys[idx]; - if (sdata->default_key) - ieee80211_debugfs_key_add_default(sdata); - } + ieee80211_set_default_key(sdata, idx); return 0; } @@ -1362,9 +1083,9 @@ static int ieee80211_ioctl_giwencode(str return 0; } - memcpy(key, sdata->keys[idx]->key, - min((int)erq->length, sdata->keys[idx]->keylen)); - erq->length = sdata->keys[idx]->keylen; + memcpy(key, sdata->keys[idx]->conf.key, + min_t(int, erq->length, sdata->keys[idx]->conf.keylen)); + erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; return 0; @@ -1390,22 +1111,12 @@ static int ieee80211_ioctl_siwauth(struc ret = -EINVAL; else { /* - * TODO: sdata->u.sta.key_mgmt does not match with WE18 - * value completely; could consider modifying this to - * be closer to WE18. For now, this value is not really - * used for anything else than Privacy matching, so the - * current code here should be more or less OK. + * Key management was set by wpa_supplicant, + * we only need this to associate to a network + * that has privacy enabled regardless of not + * having a key. */ - if (data->value & IW_AUTH_KEY_MGMT_802_1X) { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_WPA_EAP; - } else if (data->value & IW_AUTH_KEY_MGMT_PSK) { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_WPA_PSK; - } else { - sdata->u.sta.key_mgmt = - IEEE80211_KEY_MGMT_NONE; - } + sdata->u.sta.key_management_enabled = !!data->value; } break; case IW_AUTH_80211_AUTH_ALG: @@ -1580,7 +1291,7 @@ static const iw_handler ieee80211_handle (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ (iw_handler) ieee80211_ioctl_giwfrag, /* SIOCGIWFRAG */ (iw_handler) NULL, /* SIOCSIWTXPOW */ - (iw_handler) NULL, /* SIOCGIWTXPOW */ + (iw_handler) ieee80211_ioctl_giwtxpower, /* SIOCGIWTXPOW */ (iw_handler) ieee80211_ioctl_siwretry, /* SIOCSIWRETRY */ (iw_handler) ieee80211_ioctl_giwretry, /* SIOCGIWRETRY */ (iw_handler) ieee80211_ioctl_siwencode, /* SIOCSIWENCODE */ diff -puN net/mac80211/ieee80211_key.h~git-net net/mac80211/ieee80211_key.h --- a/net/mac80211/ieee80211_key.h~git-net +++ a/net/mac80211/ieee80211_key.h @@ -11,7 +11,7 @@ #define IEEE80211_KEY_H #include -#include +#include #include #include @@ -41,11 +41,21 @@ #define NUM_RX_DATA_QUEUES 17 +struct ieee80211_local; +struct ieee80211_sub_if_data; +struct sta_info; + +#define KEY_FLAG_UPLOADED_TO_HARDWARE (1<<0) + struct ieee80211_key { - struct kref kref; + struct ieee80211_local *local; + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + struct list_head list; + + unsigned int flags; - int hw_key_idx; /* filled and used by low-level driver */ - ieee80211_key_alg alg; union { struct { /* last used TSC */ @@ -73,22 +83,16 @@ struct ieee80211_key { u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; } ccmp; } u; - int tx_rx_count; /* number of times this key has been used */ - int keylen; - /* if the low level driver can provide hardware acceleration it should - * clear this flag */ - unsigned int force_sw_encrypt:1; - unsigned int default_tx_key:1; /* This key is the new default TX key - * (used only for broadcast keys). */ - s8 keyidx; /* WEP key index */ + /* number of times this key has been used */ + int tx_rx_count; #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *stalink; struct dentry *dir; struct dentry *keylen; - struct dentry *force_sw_encrypt; + struct dentry *flags; struct dentry *keyidx; struct dentry *hw_key_idx; struct dentry *tx_rx_count; @@ -97,10 +101,27 @@ struct ieee80211_key { struct dentry *rx_spec; struct dentry *replays; struct dentry *key; + struct dentry *ifindex; } debugfs; #endif - u8 key[0]; + /* + * key config, must be last because it contains key + * material as variable length member + */ + struct ieee80211_key_conf conf; }; +struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + ieee80211_key_alg alg, + int idx, + size_t key_len, + const u8 *key_data); +void ieee80211_key_free(struct ieee80211_key *key); +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); + #endif /* IEEE80211_KEY_H */ diff -puN net/mac80211/ieee80211_rate.c~git-net net/mac80211/ieee80211_rate.c --- a/net/mac80211/ieee80211_rate.c~git-net +++ a/net/mac80211/ieee80211_rate.c @@ -9,6 +9,7 @@ */ #include +#include #include "ieee80211_rate.h" #include "ieee80211_i.h" @@ -137,3 +138,44 @@ void rate_control_put(struct rate_contro { kref_put(&ref->kref, rate_control_release); } + +int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, + const char *name) +{ + struct rate_control_ref *ref, *old; + + ASSERT_RTNL(); + if (local->open_count || netif_running(local->mdev) || + (local->apdev && netif_running(local->apdev))) + return -EBUSY; + + ref = rate_control_alloc(name, local); + if (!ref) { + printk(KERN_WARNING "%s: Failed to select rate control " + "algorithm\n", local->mdev->name); + return -ENOENT; + } + + old = local->rate_ctrl; + local->rate_ctrl = ref; + if (old) { + rate_control_put(old); + sta_info_flush(local, NULL); + } + + printk(KERN_DEBUG "%s: Selected rate control " + "algorithm '%s'\n", local->mdev->name, + ref->ops->name); + + + return 0; +} + +void rate_control_deinitialize(struct ieee80211_local *local) +{ + struct rate_control_ref *ref; + + ref = local->rate_ctrl; + local->rate_ctrl = NULL; + rate_control_put(ref); +} diff -puN net/mac80211/ieee80211_rate.h~git-net net/mac80211/ieee80211_rate.h --- a/net/mac80211/ieee80211_rate.h~git-net +++ a/net/mac80211/ieee80211_rate.h @@ -141,4 +141,10 @@ static inline void rate_control_remove_s #endif } + +/* functions for rate control related to a device */ +int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, + const char *name); +void rate_control_deinitialize(struct ieee80211_local *local); + #endif /* IEEE80211_RATE_H */ diff -puN net/mac80211/ieee80211_sta.c~git-net net/mac80211/ieee80211_sta.c --- a/net/mac80211/ieee80211_sta.c~git-net +++ a/net/mac80211/ieee80211_sta.c @@ -234,7 +234,6 @@ static int ecw2cw(int ecw) return cw - 1; } - static void ieee80211_sta_wmm_params(struct net_device *dev, struct ieee80211_if_sta *ifsta, u8 *wmm_param, size_t wmm_param_len) @@ -318,8 +317,10 @@ static void ieee80211_handle_erp_ie(stru struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; int use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + int preamble_mode = (erp_value & WLAN_ERP_BARKER_PREAMBLE) != 0; + u8 changes = 0; - if (use_protection != sdata->use_protection) { + if (use_protection != !!(sdata->flags & IEEE80211_SDATA_USE_PROTECTION)) { if (net_ratelimit()) { printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" MAC_FMT ")\n", @@ -327,8 +328,31 @@ static void ieee80211_handle_erp_ie(stru use_protection ? "enabled" : "disabled", MAC_ARG(ifsta->bssid)); } - sdata->use_protection = use_protection; + if (use_protection) + sdata->flags |= IEEE80211_SDATA_USE_PROTECTION; + else + sdata->flags &= ~IEEE80211_SDATA_USE_PROTECTION; + changes |= IEEE80211_ERP_CHANGE_PROTECTION; } + + if (preamble_mode != !(sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: switched to %s barker preamble" + " (BSSID=" MAC_FMT ")\n", + dev->name, + (preamble_mode == WLAN_ERP_PREAMBLE_SHORT) ? + "short" : "long", + MAC_ARG(ifsta->bssid)); + } + if (preamble_mode) + sdata->flags &= ~IEEE80211_SDATA_SHORT_PREAMBLE; + else + sdata->flags |= IEEE80211_SDATA_SHORT_PREAMBLE; + changes |= IEEE80211_ERP_CHANGE_PREAMBLE; + } + + if (changes) + ieee80211_erp_info_change_notify(dev, changes); } @@ -344,7 +368,7 @@ static void ieee80211_sta_send_associnfo return; buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + - ifsta->assocresp_ies_len), GFP_ATOMIC); + ifsta->assocresp_ies_len), GFP_KERNEL); if (!buf) return; @@ -384,19 +408,20 @@ static void ieee80211_sta_send_associnfo static void ieee80211_set_associated(struct net_device *dev, - struct ieee80211_if_sta *ifsta, int assoc) + struct ieee80211_if_sta *ifsta, + unsigned int assoc) { union iwreq_data wrqu; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (ifsta->associated == assoc) + if (!!(ifsta->flags & IEEE80211_STA_ASSOCIATED) == assoc) return; - ifsta->associated = assoc; - if (assoc) { struct ieee80211_sub_if_data *sdata; struct ieee80211_sta_bss *bss; + + ifsta->flags |= IEEE80211_STA_ASSOCIATED; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type != IEEE80211_IF_TYPE_STA) return; @@ -409,13 +434,15 @@ static void ieee80211_set_associated(str } netif_carrier_on(dev); - ifsta->prev_bssid_set = 1; + ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(dev, ifsta); } else { + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + netif_carrier_off(dev); - sdata->use_protection = 0; + ieee80211_reset_erp_info(dev); memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); } wrqu.ap_addr.sa_family = ARPHRD_ETHER; @@ -447,8 +474,10 @@ static void ieee80211_sta_tx(struct net_ pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); pkt_data->ifindex = sdata->dev->ifindex; - pkt_data->mgmt_iface = (sdata->type == IEEE80211_IF_TYPE_MGMT); - pkt_data->do_not_encrypt = !encrypt; + if (sdata->type == IEEE80211_IF_TYPE_MGMT) + pkt_data->flags |= IEEE80211_TXPD_MGMT_IFACE; + if (!encrypt) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; dev_queue_xmit(skb); } @@ -559,7 +588,7 @@ static void ieee80211_send_assoc(struct memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - if (ifsta->prev_bssid_set) { + if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { skb_put(skb, 10); mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, IEEE80211_STYPE_REASSOC_REQ); @@ -611,7 +640,7 @@ static void ieee80211_send_assoc(struct memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); } - if (wmm && ifsta->wmm_enabled) { + if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; *pos++ = 7; /* len */ @@ -626,7 +655,7 @@ static void ieee80211_send_assoc(struct kfree(ifsta->assocreq_ies); ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; - ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_ATOMIC); + ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); if (ifsta->assocreq_ies) memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); @@ -698,8 +727,8 @@ static int ieee80211_privacy_mismatch(st struct ieee80211_sta_bss *bss; int res = 0; - if (!ifsta || ifsta->mixed_cell || - ifsta->key_mgmt != IEEE80211_KEY_MGMT_NONE) + if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL) || + ifsta->key_management_enabled) return 0; bss = ieee80211_rx_bss_get(dev, ifsta->bssid); @@ -767,22 +796,20 @@ static void ieee80211_associated(struct disassoc = 0; if (time_after(jiffies, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { - if (ifsta->probereq_poll) { + if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { printk(KERN_DEBUG "%s: No ProbeResp from " "current AP " MAC_FMT " - assume out of " "range\n", dev->name, MAC_ARG(ifsta->bssid)); disassoc = 1; - sta_info_free(sta, 0); - ifsta->probereq_poll = 0; - } else { + sta_info_free(sta); + } else ieee80211_send_probe_req(dev, ifsta->bssid, local->scan_ssid, local->scan_ssid_len); - ifsta->probereq_poll = 1; - } + ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { - ifsta->probereq_poll = 0; + ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; if (time_after(jiffies, ifsta->last_probe + IEEE80211_PROBE_INTERVAL)) { ifsta->last_probe = jiffies; @@ -876,7 +903,7 @@ static int ieee80211_sta_wep_configured( { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (!sdata || !sdata->default_key || - sdata->default_key->alg != ALG_WEP) + sdata->default_key->conf.alg != ALG_WEP) return 0; return 1; } @@ -886,7 +913,7 @@ static void ieee80211_auth_completed(str struct ieee80211_if_sta *ifsta) { printk(KERN_DEBUG "%s: authenticated\n", dev->name); - ifsta->authenticated = 1; + ifsta->flags |= IEEE80211_STA_AUTHENTICATED; ieee80211_associate(dev, ifsta); } @@ -1073,7 +1100,7 @@ static void ieee80211_rx_mgmt_deauth(str " (reason=%d)\n", dev->name, MAC_ARG(mgmt->sa), reason_code); - if (ifsta->authenticated) { + if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) { printk(KERN_DEBUG "%s: deauthenticated\n", dev->name); } @@ -1086,7 +1113,7 @@ static void ieee80211_rx_mgmt_deauth(str } ieee80211_set_disassoc(dev, ifsta, 1); - ifsta->authenticated = 0; + ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; } @@ -1118,7 +1145,7 @@ static void ieee80211_rx_mgmt_disassoc(s " (reason=%d)\n", dev->name, MAC_ARG(mgmt->sa), reason_code); - if (ifsta->associated) + if (ifsta->flags & IEEE80211_STA_ASSOCIATED) printk(KERN_DEBUG "%s: disassociated\n", dev->name); if (ifsta->state == IEEE80211_ASSOCIATED) { @@ -1187,8 +1214,10 @@ static void ieee80211_rx_mgmt_assoc_resp if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", dev->name, status_code); - if (status_code == WLAN_STATUS_REASSOC_NO_ASSOC) - ifsta->prev_bssid_set = 0; + /* if this was a reassociation, ensure we try a "full" + * association next time. This works around some broken APs + * which do not correctly reject reassociation requests. */ + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; return; } @@ -1224,7 +1253,7 @@ static void ieee80211_rx_mgmt_assoc_resp kfree(ifsta->assocresp_ies); ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); - ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_ATOMIC); + ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); if (ifsta->assocresp_ies) memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); @@ -1234,7 +1263,7 @@ static void ieee80211_rx_mgmt_assoc_resp sta = sta_info_get(local, ifsta->bssid); if (!sta) { struct ieee80211_sta_bss *bss; - sta = sta_info_add(local, dev, ifsta->bssid, GFP_ATOMIC); + sta = sta_info_add(local, dev, ifsta->bssid, GFP_KERNEL); if (!sta) { printk(KERN_DEBUG "%s: failed to add STA entry for the" " AP\n", dev->name); @@ -1250,8 +1279,7 @@ static void ieee80211_rx_mgmt_assoc_resp } sta->dev = dev; - sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC; - sta->assoc_ap = 1; + sta->flags |= WLAN_STA_AUTH | WLAN_STA_ASSOC | WLAN_STA_ASSOC_AP; rates = 0; mode = local->oper_hw_mode; @@ -1275,7 +1303,7 @@ static void ieee80211_rx_mgmt_assoc_resp rate_control_rate_init(sta, local); - if (elems.wmm_param && ifsta->wmm_enabled) { + if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { sta->flags |= WLAN_STA_WME; ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); @@ -1672,7 +1700,7 @@ static void ieee80211_rx_mgmt_beacon(str return; ifsta = &sdata->u.sta; - if (!ifsta->associated || + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; @@ -1688,7 +1716,7 @@ static void ieee80211_rx_mgmt_beacon(str if (elems.erp_info && elems.erp_info_len >= 1) ieee80211_handle_erp_ie(dev, elems.erp_info[0]); - if (elems.wmm_param && ifsta->wmm_enabled) { + if (elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, elems.wmm_param_len); } @@ -1751,7 +1779,7 @@ static void ieee80211_rx_mgmt_probe_req( } /* Reply with ProbeResp */ - skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); + skb = skb_copy(ifsta->probe_resp, GFP_KERNEL); if (!skb) return; @@ -1890,7 +1918,7 @@ static int ieee80211_sta_active_ibss(str int active = 0; struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(sta, &local->sta_list, list) { if (sta->dev == dev && time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, @@ -1899,7 +1927,7 @@ static int ieee80211_sta_active_ibss(str break; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); return active; } @@ -1909,16 +1937,24 @@ static void ieee80211_sta_expire(struct { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) if (time_after(jiffies, sta->last_rx + IEEE80211_IBSS_INACTIVITY_LIMIT)) { printk(KERN_DEBUG "%s: expiring inactive STA " MAC_FMT "\n", dev->name, MAC_ARG(sta->addr)); - sta_info_free(sta, 1); + __sta_info_get(sta); + sta_info_remove(sta); + list_add(&sta->list, &tmp_list); } - spin_unlock_bh(&local->sta_lock); + write_unlock_bh(&local->sta_lock); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) { + sta_info_free(sta); + sta_info_put(sta); + } } @@ -2047,7 +2083,8 @@ static void ieee80211_sta_reset_auth(str printk(KERN_DEBUG "%s: Initial auth_alg=%d\n", dev->name, ifsta->auth_alg); ifsta->auth_transaction = -1; - ifsta->associated = ifsta->auth_tries = ifsta->assoc_tries = 0; + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + ifsta->auth_tries = ifsta->assoc_tries = 0; netif_carrier_off(dev); } @@ -2061,8 +2098,10 @@ void ieee80211_sta_req_auth(struct net_d if (sdata->type != IEEE80211_IF_TYPE_STA) return; - if ((ifsta->bssid_set || ifsta->auto_bssid_sel) && - (ifsta->ssid_set || ifsta->auto_ssid_sel)) { + if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | + IEEE80211_STA_AUTO_BSSID_SEL)) && + (ifsta->flags & (IEEE80211_STA_SSID_SET | + IEEE80211_STA_AUTO_SSID_SEL))) { set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); } @@ -2076,7 +2115,7 @@ static int ieee80211_sta_match_ssid(stru if (!memcmp(ifsta->ssid, ssid, ssid_len)) return 1; - if (ifsta->auto_bssid_sel) + if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) return 0; hidden_ssid = 1; @@ -2105,8 +2144,8 @@ static int ieee80211_sta_config_auth(str struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - if (!ifsta->auto_channel_sel && !ifsta->auto_bssid_sel && - !ifsta->auto_ssid_sel) { + if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) { ifsta->state = IEEE80211_AUTHENTICATE; ieee80211_sta_reset_auth(dev, ifsta); return 0; @@ -2122,14 +2161,15 @@ static int ieee80211_sta_config_auth(str !!sdata->default_key) continue; - if (!ifsta->auto_channel_sel && bss->freq != freq) + if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && + bss->freq != freq) continue; - if (!ifsta->auto_bssid_sel && + if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) continue; - if (!ifsta->auto_ssid_sel && + if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) continue; @@ -2144,7 +2184,7 @@ static int ieee80211_sta_config_auth(str if (selected) { ieee80211_set_channel(local, -1, selected->freq); - if (!ifsta->ssid_set) + if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(dev, selected->ssid, selected->ssid_len); ieee80211_sta_set_bssid(dev, selected->bssid); @@ -2154,7 +2194,7 @@ static int ieee80211_sta_config_auth(str return 0; } else { if (ifsta->state != IEEE80211_AUTHENTICATE) { - if (ifsta->auto_ssid_sel) + if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) ieee80211_sta_start_scan(dev, NULL, 0); else ieee80211_sta_start_scan(dev, ifsta->ssid, @@ -2271,8 +2311,9 @@ static int ieee80211_sta_join_ibss(struc "for IBSS beacon\n", dev->name); break; } - control.tx_rate = (local->short_preamble && - (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? + control.tx_rate = + ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? rate->val2 : rate->val; control.antenna_sel_tx = local->hw.conf.antenna_sel_tx; control.power_level = local->hw.conf.power_level; @@ -2451,10 +2492,10 @@ static int ieee80211_sta_find_ibss(struc if (time_after(jiffies, ifsta->ibss_join_req + IEEE80211_IBSS_JOIN_TIMEOUT)) { - if (ifsta->create_ibss && + if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && local->oper_channel->flag & IEEE80211_CHAN_W_IBSS) return ieee80211_sta_create_ibss(dev, ifsta); - if (ifsta->create_ibss) { + if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { printk(KERN_DEBUG "%s: IBSS not allowed on the" " configured channel %d (%d MHz)\n", dev->name, local->hw.conf.channel, @@ -2515,13 +2556,17 @@ int ieee80211_sta_set_ssid(struct net_de ifsta = &sdata->u.sta; if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) - ifsta->prev_bssid_set = 0; + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->ssid, ssid, len); memset(ifsta->ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); ifsta->ssid_len = len; - ifsta->ssid_set = len ? 1 : 0; - if (sdata->type == IEEE80211_IF_TYPE_IBSS && !ifsta->bssid_set) { + if (len) + ifsta->flags |= IEEE80211_STA_SSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_SSID_SET; + if (sdata->type == IEEE80211_IF_TYPE_IBSS && + !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { ifsta->ibss_join_req = jiffies; ifsta->state = IEEE80211_IBSS_SEARCH; return ieee80211_sta_find_ibss(dev, ifsta); @@ -2559,10 +2604,11 @@ int ieee80211_sta_set_bssid(struct net_d } } - if (!is_valid_ether_addr(bssid)) - ifsta->bssid_set = 0; + if (is_valid_ether_addr(bssid)) + ifsta->flags |= IEEE80211_STA_BSSID_SET; else - ifsta->bssid_set = 1; + ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + return 0; } @@ -2629,7 +2675,7 @@ void ieee80211_scan_completed(struct iee continue; if (sdata->type == IEEE80211_IF_TYPE_STA) { - if (sdata->u.sta.associated) + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) ieee80211_send_nullfunc(local, sdata, 0); ieee80211_sta_timer((unsigned long)sdata); } @@ -2641,7 +2687,7 @@ void ieee80211_scan_completed(struct iee sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->type == IEEE80211_IF_TYPE_IBSS) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; - if (!ifsta->bssid_set || + if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || (!ifsta->state == IEEE80211_IBSS_JOINED && !ieee80211_sta_active_ibss(dev))) ieee80211_sta_find_ibss(dev, ifsta); @@ -2783,7 +2829,7 @@ static int ieee80211_sta_start_scan(stru netif_stop_queue(sdata->dev); if (sdata->type == IEEE80211_IF_TYPE_STA && - sdata->u.sta.associated) + (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) ieee80211_send_nullfunc(local, sdata, 1); } read_unlock(&local->sub_if_lock); @@ -3096,7 +3142,7 @@ int ieee80211_sta_disassociate(struct ne if (sdata->type != IEEE80211_IF_TYPE_STA) return -EINVAL; - if (!ifsta->associated) + if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) return -1; ieee80211_send_disassoc(dev, ifsta, reason); diff -puN /dev/null net/mac80211/key.c --- /dev/null +++ a/net/mac80211/key.c @@ -0,0 +1,275 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include "ieee80211_i.h" +#include "debugfs_key.h" +#include "aes_ccm.h" + + +/* + * Key handling basics + * + * Key handling in mac80211 is done based on per-interface (sub_if_data) + * keys and per-station keys. Since each station belongs to an interface, + * each station key also belongs to that interface. + * + * Hardware acceleration is done on a best-effort basis, for each key + * that is eligible the hardware is asked to enable that key but if + * it cannot do that they key is simply kept for software encryption. + * There is currently no way of knowing this except by looking into + * debugfs. + * + * All operations here are called under RTNL so no extra locking is + * required. + */ + +static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static const u8 zero_addr[ETH_ALEN]; + +static const u8 *get_mac_for_key(struct ieee80211_key *key) +{ + const u8 *addr = bcast_addr; + + /* + * If we're an AP we won't ever receive frames with a non-WEP + * group key so we tell the driver that by using the zero MAC + * address to indicate a transmit-only key. + */ + if (key->conf.alg != ALG_WEP && + (key->sdata->type == IEEE80211_IF_TYPE_AP || + key->sdata->type == IEEE80211_IF_TYPE_VLAN)) + addr = zero_addr; + + if (key->sta) + addr = key->sta->addr; + + return addr; +} + +static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) +{ + const u8 *addr; + int ret; + + if (!key->local->ops->set_key) + return; + + addr = get_mac_for_key(key); + + ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY, + key->sdata->dev->dev_addr, addr, + &key->conf); + + WARN_ON(!ret && (key->conf.hw_key_idx == HW_KEY_IDX_INVALID)); + + if (!ret) + key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + + if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) + printk(KERN_ERR "mac80211-%s: failed to set key " + "(%d, " MAC_FMT ") to hardware (%d)\n", + wiphy_name(key->local->hw.wiphy), + key->conf.keyidx, MAC_ARG(addr), ret); +} + +static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) +{ + const u8 *addr; + int ret; + + if (!key->local->ops->set_key) + return; + + if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + return; + + addr = get_mac_for_key(key); + + ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY, + key->sdata->dev->dev_addr, addr, + &key->conf); + + if (ret) + printk(KERN_ERR "mac80211-%s: failed to remove key " + "(%d, " MAC_FMT ") from hardware (%d)\n", + wiphy_name(key->local->hw.wiphy), + key->conf.keyidx, MAC_ARG(addr), ret); + + key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; + key->conf.hw_key_idx = HW_KEY_IDX_INVALID; +} + +struct ieee80211_key *ieee80211_key_alloc(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + ieee80211_key_alg alg, + int idx, + size_t key_len, + const u8 *key_data) +{ + struct ieee80211_key *key; + + BUG_ON(alg == ALG_NONE); + + key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); + if (!key) + return NULL; + + /* + * Default to software encryption; we'll later upload the + * key to the hardware if possible. + */ + key->conf.hw_key_idx = HW_KEY_IDX_INVALID; + key->conf.flags = 0; + key->flags = 0; + + key->conf.alg = alg; + key->conf.keyidx = idx; + key->conf.keylen = key_len; + memcpy(key->conf.key, key_data, key_len); + + key->local = sdata->local; + key->sdata = sdata; + key->sta = sta; + + if (alg == ALG_CCMP) { + /* + * Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); + if (!key->u.ccmp.tfm) { + ieee80211_key_free(key); + return NULL; + } + } + + ieee80211_debugfs_key_add(key->local, key); + + if (sta) { + ieee80211_debugfs_key_sta_link(key, sta); + sta->key = key; + /* + * some hardware cannot handle TKIP with QoS, so + * we indicate whether QoS could be in use. + */ + if (sta->flags & WLAN_STA_WME) + key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; + } else { + if (sdata->type == IEEE80211_IF_TYPE_STA) { + struct sta_info *ap; + + /* same here, the AP could be using QoS */ + ap = sta_info_get(key->local, key->sdata->u.sta.bssid); + if (ap) { + if (ap->flags & WLAN_STA_WME) + key->conf.flags |= + IEEE80211_KEY_FLAG_WMM_STA; + sta_info_put(ap); + } + } + + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) { + if (!sdata->keys[idx]) + sdata->keys[idx] = key; + else + WARN_ON(1); + } else + WARN_ON(1); + } + + list_add(&key->list, &sdata->key_list); + + if (netif_running(key->sdata->dev)) + ieee80211_key_enable_hw_accel(key); + + return key; +} + +void ieee80211_key_free(struct ieee80211_key *key) +{ + if (!key) + return; + + ieee80211_key_disable_hw_accel(key); + + if (key->sta) { + key->sta->key = NULL; + } else { + if (key->sdata->default_key == key) + ieee80211_set_default_key(key->sdata, -1); + if (key->conf.keyidx >= 0 && + key->conf.keyidx < NUM_DEFAULT_KEYS) + key->sdata->keys[key->conf.keyidx] = NULL; + else + WARN_ON(1); + } + + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + ieee80211_debugfs_key_remove(key); + + list_del(&key->list); + + kfree(key); +} + +void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) +{ + struct ieee80211_key *key = NULL; + + if (idx >= 0 && idx < NUM_DEFAULT_KEYS) + key = sdata->keys[idx]; + + if (sdata->default_key != key) { + ieee80211_debugfs_key_remove_default(sdata); + + sdata->default_key = key; + + if (sdata->default_key) + ieee80211_debugfs_key_add_default(sdata); + + if (sdata->local->ops->set_key_idx) + sdata->local->ops->set_key_idx( + local_to_hw(sdata->local), idx); + } +} + +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key, *tmp; + + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) + ieee80211_key_free(key); +} + +void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key; + + WARN_ON(!netif_running(sdata->dev)); + if (!netif_running(sdata->dev)) + return; + + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_enable_hw_accel(key); +} + +void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_key *key; + + list_for_each_entry(key, &sdata->key_list, list) + ieee80211_key_disable_hw_accel(key); +} diff -puN net/mac80211/rc80211_simple.c~git-net net/mac80211/rc80211_simple.c --- a/net/mac80211/rc80211_simple.c~git-net +++ a/net/mac80211/rc80211_simple.c @@ -147,14 +147,6 @@ static void rate_control_simple_tx_statu srctrl = sta->rate_ctrl_priv; srctrl->tx_num_xmit++; if (status->excessive_retries) { - sta->antenna_sel_tx = sta->antenna_sel_tx == 1 ? 2 : 1; - sta->antenna_sel_rx = sta->antenna_sel_rx == 1 ? 2 : 1; - if (local->sta_antenna_sel == STA_ANTENNA_SEL_SW_CTRL_DEBUG) { - printk(KERN_DEBUG "%s: " MAC_FMT " TX antenna --> %d " - "RX antenna --> %d (@%lu)\n", - dev->name, MAC_ARG(hdr->addr1), - sta->antenna_sel_tx, sta->antenna_sel_rx, jiffies); - } srctrl->tx_num_failures++; sta->tx_retry_failed++; sta->tx_num_consecutive_failures++; diff -puN /dev/null net/mac80211/rx.c --- /dev/null +++ a/net/mac80211/rx.c @@ -0,0 +1,1489 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "ieee80211_i.h" +#include "ieee80211_led.h" +#include "ieee80211_common.h" +#include "wep.h" +#include "wpa.h" +#include "tkip.h" +#include "wme.h" + +/* pre-rx handlers + * + * these don't have dev/sdata fields in the rx data + * The sta value should also not be used because it may + * be NULL even though a STA (in IBSS mode) will be added. + */ + +static ieee80211_txrx_result +ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx) +{ + u8 *data = rx->skb->data; + int tid; + + /* does the frame have a qos control field? */ + if (WLAN_FC_IS_QOS_DATA(rx->fc)) { + u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; + /* frame has qos control */ + tid = qc[0] & QOS_CONTROL_TID_MASK; + } else { + if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { + /* Separate TID for management frames */ + tid = NUM_RX_DATA_QUEUES - 1; + } else { + /* no qos control present */ + tid = 0; /* 802.1d - Best Effort */ + } + } + + I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); + /* only a debug counter, sta might not be assigned properly yet */ + if (rx->sta) + I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); + + rx->u.rx.queue = tid; + /* Set skb->priority to 1d tag if highest order bit of TID is not set. + * For now, set skb->priority to 0 for other cases. */ + rx->skb->priority = (tid > 7) ? 0 : tid; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_load_stats(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct sk_buff *skb = rx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u32 load = 0, hdrtime; + struct ieee80211_rate *rate; + struct ieee80211_hw_mode *mode = local->hw.conf.mode; + int i; + + /* Estimate total channel use caused by this frame */ + + if (unlikely(mode->num_rates < 0)) + return TXRX_CONTINUE; + + rate = &mode->rates[0]; + for (i = 0; i < mode->num_rates; i++) { + if (mode->rates[i].val == rx->u.rx.status->rate) { + rate = &mode->rates[i]; + break; + } + } + + /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, + * 1 usec = 1/8 * (1080 / 10) = 13.5 */ + + if (mode->mode == MODE_IEEE80211A || + mode->mode == MODE_ATHEROS_TURBO || + mode->mode == MODE_ATHEROS_TURBOG || + (mode->mode == MODE_IEEE80211G && + rate->flags & IEEE80211_RATE_ERP)) + hdrtime = CHAN_UTIL_HDR_SHORT; + else + hdrtime = CHAN_UTIL_HDR_LONG; + + load = hdrtime; + if (!is_multicast_ether_addr(hdr->addr1)) + load += hdrtime; + + load += skb->len * rate->rate_inv; + + /* Divide channel_use by 8 to avoid wrapping around the counter */ + load >>= CHAN_UTIL_SHIFT; + local->channel_use_raw += load; + rx->u.rx.load = load; + + return TXRX_CONTINUE; +} + +ieee80211_rx_handler ieee80211_rx_pre_handlers[] = +{ + ieee80211_rx_h_parse_qos, + ieee80211_rx_h_load_stats, + NULL +}; + +/* rx handlers */ + +static ieee80211_txrx_result +ieee80211_rx_h_if_stats(struct ieee80211_txrx_data *rx) +{ + if (rx->sta) + rx->sta->channel_use_raw += rx->u.rx.load; + rx->sdata->channel_use_raw += rx->u.rx.load; + return TXRX_CONTINUE; +} + +static void +ieee80211_rx_monitor(struct net_device *dev, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; + struct ieee80211_rate *rate; + struct ieee80211_rtap_hdr { + struct ieee80211_radiotap_header hdr; + u8 flags; + u8 rate; + __le16 chan_freq; + __le16 chan_flags; + u8 antsignal; + } __attribute__ ((packed)) *rthdr; + + skb->dev = dev; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (status->flag & RX_FLAG_RADIOTAP) + goto out; + + if (skb_headroom(skb) < sizeof(*rthdr)) { + I802_DEBUG_INC(local->rx_expand_skb_head); + if (pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return; + } + } + + rthdr = (struct ieee80211_rtap_hdr *) skb_push(skb, sizeof(*rthdr)); + memset(rthdr, 0, sizeof(*rthdr)); + rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); + rthdr->hdr.it_present = + cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) | + (1 << IEEE80211_RADIOTAP_RATE) | + (1 << IEEE80211_RADIOTAP_CHANNEL) | + (1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL)); + rthdr->flags = local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS ? + IEEE80211_RADIOTAP_F_FCS : 0; + rate = ieee80211_get_rate(local, status->phymode, status->rate); + if (rate) + rthdr->rate = rate->rate / 5; + rthdr->chan_freq = cpu_to_le16(status->freq); + rthdr->chan_flags = + status->phymode == MODE_IEEE80211A ? + cpu_to_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ) : + cpu_to_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ); + rthdr->antsignal = status->ssi; + + out: + sdata->stats.rx_packets++; + sdata->stats.rx_bytes += skb->len; + + skb_set_mac_header(skb, 0); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_OTHERHOST; + skb->protocol = htons(ETH_P_802_2); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); +} + +static ieee80211_txrx_result +ieee80211_rx_h_monitor(struct ieee80211_txrx_data *rx) +{ + if (rx->sdata->type == IEEE80211_IF_TYPE_MNTR) { + ieee80211_rx_monitor(rx->dev, rx->skb, rx->u.rx.status); + return TXRX_QUEUED; + } + + if (rx->u.rx.status->flag & RX_FLAG_RADIOTAP) + skb_pull(rx->skb, ieee80211_get_radiotap_len(rx->skb->data)); + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_passive_scan(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct sk_buff *skb = rx->skb; + + if (unlikely(local->sta_scanning != 0)) { + ieee80211_sta_rx_scan(rx->dev, skb, rx->u.rx.status); + return TXRX_QUEUED; + } + + if (unlikely(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) { + /* scanning finished during invoking of handlers */ + I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_check(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *) rx->skb->data; + + /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ + if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + if (unlikely(rx->fc & IEEE80211_FCTL_RETRY && + rx->sta->last_seq_ctrl[rx->u.rx.queue] == + hdr->seq_ctrl)) { + if (rx->flags & IEEE80211_TXRXD_RXRA_MATCH) { + rx->local->dot11FrameDuplicateCount++; + rx->sta->num_duplicates++; + } + return TXRX_DROP; + } else + rx->sta->last_seq_ctrl[rx->u.rx.queue] = hdr->seq_ctrl; + } + + if ((rx->local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) && + rx->skb->len > FCS_LEN) + skb_trim(rx->skb, rx->skb->len - FCS_LEN); + + if (unlikely(rx->skb->len < 16)) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_short); + return TXRX_DROP; + } + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + rx->skb->pkt_type = PACKET_OTHERHOST; + else if (compare_ether_addr(rx->dev->dev_addr, hdr->addr1) == 0) + rx->skb->pkt_type = PACKET_HOST; + else if (is_multicast_ether_addr(hdr->addr1)) { + if (is_broadcast_ether_addr(hdr->addr1)) + rx->skb->pkt_type = PACKET_BROADCAST; + else + rx->skb->pkt_type = PACKET_MULTICAST; + } else + rx->skb->pkt_type = PACKET_OTHERHOST; + + /* Drop disallowed frame classes based on STA auth/assoc state; + * IEEE 802.11, Chap 5.5. + * + * 80211.o does filtering only based on association state, i.e., it + * drops Class 3 frames from not associated stations. hostapd sends + * deauth/disassoc frames when needed. In addition, hostapd is + * responsible for filtering on both auth and assoc states. + */ + if (unlikely(((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA || + ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL && + (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL)) && + rx->sdata->type != IEEE80211_IF_TYPE_IBSS && + (!rx->sta || !(rx->sta->flags & WLAN_STA_ASSOC)))) { + if ((!(rx->fc & IEEE80211_FCTL_FROMDS) && + !(rx->fc & IEEE80211_FCTL_TODS) && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) + || !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + /* Drop IBSS frames and frames for other hosts + * silently. */ + return TXRX_DROP; + } + + if (!rx->local->apdev) + return TXRX_DROP; + + ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, + ieee80211_msg_sta_not_assoc); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result +ieee80211_rx_h_load_key(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + int keyidx; + int hdrlen; + + /* + * Key selection 101 + * + * There are three types of keys: + * - GTK (group keys) + * - PTK (pairwise keys) + * - STK (station-to-station pairwise keys) + * + * When selecting a key, we have to distinguish between multicast + * (including broadcast) and unicast frames, the latter can only + * use PTKs and STKs while the former always use GTKs. Unless, of + * course, actual WEP keys ("pre-RSNA") are used, then unicast + * frames can also use key indizes like GTKs. Hence, if we don't + * have a PTK/STK we check the key index for a WEP key. + * + * Note that in a regular BSS, multicast frames are sent by the + * AP only, associated stations unicast the frame to the AP first + * which then multicasts it on their behalf. + * + * There is also a slight problem in IBSS mode: GTKs are negotiated + * with each station, that is something we don't currently handle. + * The spec seems to expect that one negotiates the same key with + * every station but there's no such requirement; VLANs could be + * possible. + */ + + if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) + return TXRX_CONTINUE; + + /* + * No point in finding a key if the frame is neither + * addressed to us nor a multicast frame. + */ + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + if (!is_multicast_ether_addr(hdr->addr1) && rx->sta && rx->sta->key) { + rx->key = rx->sta->key; + } else { + /* + * The device doesn't give us the IV so we won't be + * able to look up the key. That's ok though, we + * don't need to decrypt the frame, we just won't + * be able to keep statistics accurate. + * Except for key threshold notifications, should + * we somehow allow the driver to tell us which key + * the hardware used if this flag is set? + */ + if (!(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) + return TXRX_CONTINUE; + + hdrlen = ieee80211_get_hdrlen(rx->fc); + + if (rx->skb->len < 8 + hdrlen) + return TXRX_DROP; /* TODO: count this? */ + + /* + * no need to call ieee80211_wep_get_keyidx, + * it verifies a bunch of things we've done already + */ + keyidx = rx->skb->data[hdrlen + 3] >> 6; + + rx->key = rx->sdata->keys[keyidx]; + + /* + * RSNA-protected unicast frames should always be sent with + * pairwise or station-to-station keys, but for WEP we allow + * using a key index as well. + */ + if (rx->key && rx->key->conf.alg != ALG_WEP && + !is_multicast_ether_addr(hdr->addr1)) + rx->key = NULL; + } + + if (rx->key) { + rx->key->tx_rx_count++; + if (unlikely(rx->local->key_tx_rx_threshold && + rx->key->tx_rx_count > + rx->local->key_tx_rx_threshold)) { + ieee80211_key_threshold_notify(rx->dev, rx->key, + rx->sta); + } + } + + return TXRX_CONTINUE; +} + +static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata; + sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); + + if (sdata->bss) + atomic_inc(&sdata->bss->num_sta_ps); + sta->flags |= WLAN_STA_PS; + sta->pspoll = 0; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d enters power " + "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ +} + +static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sk_buff *skb; + int sent = 0; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_packet_data *pkt_data; + + sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); + if (sdata->bss) + atomic_dec(&sdata->bss->num_sta_ps); + sta->flags &= ~(WLAN_STA_PS | WLAN_STA_TIM); + sta->pspoll = 0; + if (!skb_queue_empty(&sta->ps_tx_buf)) { + if (local->ops->set_tim) + local->ops->set_tim(local_to_hw(local), sta->aid, 0); + if (sdata->bss) + bss_tim_clear(local, sdata->bss, sta->aid); + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d exits power " + "save mode\n", dev->name, MAC_ARG(sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + /* Send all buffered frames to the station */ + while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + sent++; + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + dev_queue_xmit(skb); + } + while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + local->total_ps_buffered--; + sent++; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "%s: STA " MAC_FMT " aid %d send PS frame " + "since STA not sleeping anymore\n", dev->name, + MAC_ARG(sta->addr), sta->aid); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + pkt_data->flags |= IEEE80211_TXPD_REQUEUE; + dev_queue_xmit(skb); + } + + return sent; +} + +static ieee80211_txrx_result +ieee80211_rx_h_sta_process(struct ieee80211_txrx_data *rx) +{ + struct sta_info *sta = rx->sta; + struct net_device *dev = rx->dev; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + + if (!sta) + return TXRX_CONTINUE; + + /* Update last_rx only for IBSS packets which are for the current + * BSSID to avoid keeping the current IBSS network alive in cases where + * other STAs are using different BSSID. */ + if (rx->sdata->type == IEEE80211_IF_TYPE_IBSS) { + u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len); + if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) + sta->last_rx = jiffies; + } else + if (!is_multicast_ether_addr(hdr->addr1) || + rx->sdata->type == IEEE80211_IF_TYPE_STA) { + /* Update last_rx only for unicast frames in order to prevent + * the Probe Request frames (the only broadcast frames from a + * STA in infrastructure mode) from keeping a connection alive. + */ + sta->last_rx = jiffies; + } + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + sta->rx_fragments++; + sta->rx_bytes += rx->skb->len; + sta->last_rssi = rx->u.rx.status->ssi; + sta->last_signal = rx->u.rx.status->signal; + sta->last_noise = rx->u.rx.status->noise; + + if (!(rx->fc & IEEE80211_FCTL_MOREFRAGS)) { + /* Change STA power saving mode only in the end of a frame + * exchange sequence */ + if ((sta->flags & WLAN_STA_PS) && !(rx->fc & IEEE80211_FCTL_PM)) + rx->u.rx.sent_ps_buffered += ap_sta_ps_end(dev, sta); + else if (!(sta->flags & WLAN_STA_PS) && + (rx->fc & IEEE80211_FCTL_PM)) + ap_sta_ps_start(dev, sta); + } + + /* Drop data::nullfunc frames silently, since they are used only to + * control station power saving mode. */ + if ((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_NULLFUNC) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_nullfunc); + /* Update counter and free packet here to avoid counting this + * as a dropped packed. */ + sta->rx_packets++; + dev_kfree_skb(rx->skb); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} /* ieee80211_rx_h_sta_process */ + +static ieee80211_txrx_result +ieee80211_rx_h_wep_weak_iv_detection(struct ieee80211_txrx_data *rx) +{ + if (!rx->sta || !(rx->fc & IEEE80211_FCTL_PROTECTED) || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + !rx->key || rx->key->conf.alg != ALG_WEP || + !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_CONTINUE; + + /* Check for weak IVs, if hwaccel did not remove IV from the frame */ + if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) || + !(rx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) + if (ieee80211_wep_is_weak_iv(rx->skb, rx->key)) + rx->sta->wep_weak_iv_count++; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_wep_decrypt(struct ieee80211_txrx_data *rx) +{ + if ((rx->key && rx->key->conf.alg != ALG_WEP) || + !(rx->fc & IEEE80211_FCTL_PROTECTED) || + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) + return TXRX_CONTINUE; + + if (!rx->key) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX WEP frame, but no key set\n", + rx->dev->name); + return TXRX_DROP; + } + + if (!(rx->u.rx.status->flag & RX_FLAG_DECRYPTED) || + !(rx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { + if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX WEP frame, decrypt " + "failed\n", rx->dev->name); + return TXRX_DROP; + } + } else if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { + ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); + /* remove ICV */ + skb_trim(rx->skb, rx->skb->len - 4); + } + + return TXRX_CONTINUE; +} + +static inline struct ieee80211_fragment_entry * +ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, + unsigned int frag, unsigned int seq, int rx_queue, + struct sk_buff **skb) +{ + struct ieee80211_fragment_entry *entry; + int idx; + + idx = sdata->fragment_next; + entry = &sdata->fragments[sdata->fragment_next++]; + if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) + sdata->fragment_next = 0; + + if (!skb_queue_empty(&entry->skb_list)) { +#ifdef CONFIG_MAC80211_DEBUG + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) entry->skb_list.next->data; + printk(KERN_DEBUG "%s: RX reassembly removed oldest " + "fragment entry (idx=%d age=%lu seq=%d last_frag=%d " + "addr1=" MAC_FMT " addr2=" MAC_FMT "\n", + sdata->dev->name, idx, + jiffies - entry->first_frag_time, entry->seq, + entry->last_frag, MAC_ARG(hdr->addr1), + MAC_ARG(hdr->addr2)); +#endif /* CONFIG_MAC80211_DEBUG */ + __skb_queue_purge(&entry->skb_list); + } + + __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ + *skb = NULL; + entry->first_frag_time = jiffies; + entry->seq = seq; + entry->rx_queue = rx_queue; + entry->last_frag = frag; + entry->ccmp = 0; + entry->extra_len = 0; + + return entry; +} + +static inline struct ieee80211_fragment_entry * +ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, + u16 fc, unsigned int frag, unsigned int seq, + int rx_queue, struct ieee80211_hdr *hdr) +{ + struct ieee80211_fragment_entry *entry; + int i, idx; + + idx = sdata->fragment_next; + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { + struct ieee80211_hdr *f_hdr; + u16 f_fc; + + idx--; + if (idx < 0) + idx = IEEE80211_FRAGMENT_MAX - 1; + + entry = &sdata->fragments[idx]; + if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || + entry->rx_queue != rx_queue || + entry->last_frag + 1 != frag) + continue; + + f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; + f_fc = le16_to_cpu(f_hdr->frame_control); + + if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || + compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || + compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) + continue; + + if (entry->first_frag_time + 2 * HZ < jiffies) { + __skb_queue_purge(&entry->skb_list); + continue; + } + return entry; + } + + return NULL; +} + +static ieee80211_txrx_result +ieee80211_rx_h_defragment(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_hdr *hdr; + u16 sc; + unsigned int frag, seq; + struct ieee80211_fragment_entry *entry; + struct sk_buff *skb; + + hdr = (struct ieee80211_hdr *) rx->skb->data; + sc = le16_to_cpu(hdr->seq_ctrl); + frag = sc & IEEE80211_SCTL_FRAG; + + if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || + (rx->skb)->len < 24 || + is_multicast_ether_addr(hdr->addr1))) { + /* not fragmented */ + goto out; + } + I802_DEBUG_INC(rx->local->rx_handlers_fragments); + + seq = (sc & IEEE80211_SCTL_SEQ) >> 4; + + if (frag == 0) { + /* This is the first fragment of a new frame. */ + entry = ieee80211_reassemble_add(rx->sdata, frag, seq, + rx->u.rx.queue, &(rx->skb)); + if (rx->key && rx->key->conf.alg == ALG_CCMP && + (rx->fc & IEEE80211_FCTL_PROTECTED)) { + /* Store CCMP PN so that we can verify that the next + * fragment has a sequential PN value. */ + entry->ccmp = 1; + memcpy(entry->last_pn, + rx->key->u.ccmp.rx_pn[rx->u.rx.queue], + CCMP_PN_LEN); + } + return TXRX_QUEUED; + } + + /* This is a fragment for a frame that should already be pending in + * fragment cache. Add this fragment to the end of the pending entry. + */ + entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, + rx->u.rx.queue, hdr); + if (!entry) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); + return TXRX_DROP; + } + + /* Verify that MPDUs within one MSDU have sequential PN values. + * (IEEE 802.11i, 8.3.3.4.5) */ + if (entry->ccmp) { + int i; + u8 pn[CCMP_PN_LEN], *rpn; + if (!rx->key || rx->key->conf.alg != ALG_CCMP) + return TXRX_DROP; + memcpy(pn, entry->last_pn, CCMP_PN_LEN); + for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + pn[i]++; + if (pn[i]) + break; + } + rpn = rx->key->u.ccmp.rx_pn[rx->u.rx.queue]; + if (memcmp(pn, rpn, CCMP_PN_LEN) != 0) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: defrag: CCMP PN not " + "sequential A2=" MAC_FMT + " PN=%02x%02x%02x%02x%02x%02x " + "(expected %02x%02x%02x%02x%02x%02x)\n", + rx->dev->name, MAC_ARG(hdr->addr2), + rpn[0], rpn[1], rpn[2], rpn[3], rpn[4], + rpn[5], pn[0], pn[1], pn[2], pn[3], + pn[4], pn[5]); + return TXRX_DROP; + } + memcpy(entry->last_pn, pn, CCMP_PN_LEN); + } + + skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); + __skb_queue_tail(&entry->skb_list, rx->skb); + entry->last_frag = frag; + entry->extra_len += rx->skb->len; + if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { + rx->skb = NULL; + return TXRX_QUEUED; + } + + rx->skb = __skb_dequeue(&entry->skb_list); + if (skb_tailroom(rx->skb) < entry->extra_len) { + I802_DEBUG_INC(rx->local->rx_expand_skb_head2); + if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len, + GFP_ATOMIC))) { + I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); + __skb_queue_purge(&entry->skb_list); + return TXRX_DROP; + } + } + while ((skb = __skb_dequeue(&entry->skb_list))) { + memcpy(skb_put(rx->skb, skb->len), skb->data, skb->len); + dev_kfree_skb(skb); + } + + /* Complete frame has been reassembled - process it now */ + rx->flags |= IEEE80211_TXRXD_FRAGMENTED; + + out: + if (rx->sta) + rx->sta->rx_packets++; + if (is_multicast_ether_addr(hdr->addr1)) + rx->local->dot11MulticastReceivedFrameCount++; + else + ieee80211_led_rx(rx->local); + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_ps_poll(struct ieee80211_txrx_data *rx) +{ + struct sk_buff *skb; + int no_pending_pkts; + + if (likely(!rx->sta || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || + !(rx->flags & IEEE80211_TXRXD_RXRA_MATCH))) + return TXRX_CONTINUE; + + skb = skb_dequeue(&rx->sta->tx_filtered); + if (!skb) { + skb = skb_dequeue(&rx->sta->ps_tx_buf); + if (skb) + rx->local->total_ps_buffered--; + } + no_pending_pkts = skb_queue_empty(&rx->sta->tx_filtered) && + skb_queue_empty(&rx->sta->ps_tx_buf); + + if (skb) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + + /* tell TX path to send one frame even though the STA may + * still remain is PS mode after this frame exchange */ + rx->sta->pspoll = 1; + +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS Poll (entries " + "after %d)\n", + MAC_ARG(rx->sta->addr), rx->sta->aid, + skb_queue_len(&rx->sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + /* Use MoreData flag to indicate whether there are more + * buffered frames for this STA */ + if (no_pending_pkts) { + hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); + rx->sta->flags &= ~WLAN_STA_TIM; + } else + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); + + dev_queue_xmit(skb); + + if (no_pending_pkts) { + if (rx->local->ops->set_tim) + rx->local->ops->set_tim(local_to_hw(rx->local), + rx->sta->aid, 0); + if (rx->sdata->bss) + bss_tim_clear(rx->local, rx->sdata->bss, rx->sta->aid); + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + } else if (!rx->u.rx.sent_ps_buffered) { + printk(KERN_DEBUG "%s: STA " MAC_FMT " sent PS Poll even " + "though there is no buffered frames for it\n", + rx->dev->name, MAC_ARG(rx->sta->addr)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + + } + + /* Free PS Poll skb here instead of returning TXRX_DROP that would + * count as an dropped frame. */ + dev_kfree_skb(rx->skb); + + return TXRX_QUEUED; +} + +static ieee80211_txrx_result +ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx) +{ + u16 fc = rx->fc; + u8 *data = rx->skb->data; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; + + if (!WLAN_FC_IS_QOS_DATA(fc)) + return TXRX_CONTINUE; + + /* remove the qos control field, update frame type and meta-data */ + memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); + hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); + /* change frame type to non QOS */ + rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; + hdr->frame_control = cpu_to_le16(fc); + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_802_1x_pae(struct ieee80211_txrx_data *rx) +{ + if (rx->sdata->eapol && ieee80211_is_eapol(rx->skb) && + rx->sdata->type != IEEE80211_IF_TYPE_STA && + (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + /* Pass both encrypted and unencrypted EAPOL frames to user + * space for processing. */ + if (!rx->local->apdev) + return TXRX_DROP; + ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, + ieee80211_msg_normal); + return TXRX_QUEUED; + } + + if (unlikely(rx->sdata->ieee802_1x && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + (!rx->sta || !(rx->sta->flags & WLAN_STA_AUTHORIZED)) && + !ieee80211_is_eapol(rx->skb))) { +#ifdef CONFIG_MAC80211_DEBUG + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) rx->skb->data; + printk(KERN_DEBUG "%s: dropped frame from " MAC_FMT + " (unauthorized port)\n", rx->dev->name, + MAC_ARG(hdr->addr2)); +#endif /* CONFIG_MAC80211_DEBUG */ + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_drop_unencrypted(struct ieee80211_txrx_data *rx) +{ + /* + * Pass through unencrypted frames if the hardware might have + * decrypted them already without telling us, but that can only + * be true if we either didn't find a key or the found key is + * uploaded to the hardware. + */ + if ((rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) && + (!rx->key || (rx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))) + return TXRX_CONTINUE; + + /* Drop unencrypted frames if key is set. */ + if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && + (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + (rx->key || rx->sdata->drop_unencrypted) && + (rx->sdata->eapol == 0 || + !ieee80211_is_eapol(rx->skb)))) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: RX non-WEP frame, but expected " + "encryption\n", rx->dev->name); + return TXRX_DROP; + } + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_rx_h_data(struct ieee80211_txrx_data *rx) +{ + struct net_device *dev = rx->dev; + struct ieee80211_local *local = rx->local; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; + u16 fc, hdrlen, ethertype; + u8 *payload; + u8 dst[ETH_ALEN]; + u8 src[ETH_ALEN]; + struct sk_buff *skb = rx->skb, *skb2; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + fc = rx->fc; + if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + return TXRX_CONTINUE; + + if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + return TXRX_DROP; + + hdrlen = ieee80211_get_hdrlen(fc); + + /* convert IEEE 802.11 header + possible LLC headers into Ethernet + * header + * IEEE 802.11 address fields: + * ToDS FromDS Addr1 Addr2 Addr3 Addr4 + * 0 0 DA SA BSSID n/a + * 0 1 DA BSSID SA n/a + * 1 0 BSSID SA DA n/a + * 1 1 RA TA DA SA + */ + + switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case IEEE80211_FCTL_TODS: + /* BSSID SA DA */ + memcpy(dst, hdr->addr3, ETH_ALEN); + memcpy(src, hdr->addr2, ETH_ALEN); + + if (unlikely(sdata->type != IEEE80211_IF_TYPE_AP && + sdata->type != IEEE80211_IF_TYPE_VLAN)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped ToDS frame " + "(BSSID=" MAC_FMT + " SA=" MAC_FMT + " DA=" MAC_FMT ")\n", + dev->name, + MAC_ARG(hdr->addr1), + MAC_ARG(hdr->addr2), + MAC_ARG(hdr->addr3)); + return TXRX_DROP; + } + break; + case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + /* RA TA DA SA */ + memcpy(dst, hdr->addr3, ETH_ALEN); + memcpy(src, hdr->addr4, ETH_ALEN); + + if (unlikely(sdata->type != IEEE80211_IF_TYPE_WDS)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: dropped FromDS&ToDS " + "frame (RA=" MAC_FMT + " TA=" MAC_FMT " DA=" MAC_FMT + " SA=" MAC_FMT ")\n", + rx->dev->name, + MAC_ARG(hdr->addr1), + MAC_ARG(hdr->addr2), + MAC_ARG(hdr->addr3), + MAC_ARG(hdr->addr4)); + return TXRX_DROP; + } + break; + case IEEE80211_FCTL_FROMDS: + /* DA BSSID SA */ + memcpy(dst, hdr->addr1, ETH_ALEN); + memcpy(src, hdr->addr3, ETH_ALEN); + + if (sdata->type != IEEE80211_IF_TYPE_STA || + (is_multicast_ether_addr(dst) && + !compare_ether_addr(src, dev->dev_addr))) + return TXRX_DROP; + break; + case 0: + /* DA SA BSSID */ + memcpy(dst, hdr->addr1, ETH_ALEN); + memcpy(src, hdr->addr2, ETH_ALEN); + + if (sdata->type != IEEE80211_IF_TYPE_IBSS) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: dropped IBSS frame (DA=" + MAC_FMT " SA=" MAC_FMT " BSSID=" MAC_FMT + ")\n", + dev->name, MAC_ARG(hdr->addr1), + MAC_ARG(hdr->addr2), + MAC_ARG(hdr->addr3)); + } + return TXRX_DROP; + } + break; + } + + payload = skb->data + hdrlen; + + if (unlikely(skb->len - hdrlen < 8)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: RX too short data frame " + "payload\n", dev->name); + } + return TXRX_DROP; + } + + ethertype = (payload[6] << 8) | payload[7]; + + if (likely((compare_ether_addr(payload, rfc1042_header) == 0 && + ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) || + compare_ether_addr(payload, bridge_tunnel_header) == 0)) { + /* remove RFC1042 or Bridge-Tunnel encapsulation and + * replace EtherType */ + skb_pull(skb, hdrlen + 6); + memcpy(skb_push(skb, ETH_ALEN), src, ETH_ALEN); + memcpy(skb_push(skb, ETH_ALEN), dst, ETH_ALEN); + } else { + struct ethhdr *ehdr; + __be16 len; + skb_pull(skb, hdrlen); + len = htons(skb->len); + ehdr = (struct ethhdr *) skb_push(skb, sizeof(struct ethhdr)); + memcpy(ehdr->h_dest, dst, ETH_ALEN); + memcpy(ehdr->h_source, src, ETH_ALEN); + ehdr->h_proto = len; + } + skb->dev = dev; + + skb2 = NULL; + + sdata->stats.rx_packets++; + sdata->stats.rx_bytes += skb->len; + + if (local->bridge_packets && (sdata->type == IEEE80211_IF_TYPE_AP + || sdata->type == IEEE80211_IF_TYPE_VLAN) && + (rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) { + if (is_multicast_ether_addr(skb->data)) { + /* send multicast frames both to higher layers in + * local net stack and back to the wireless media */ + skb2 = skb_copy(skb, GFP_ATOMIC); + if (!skb2 && net_ratelimit()) + printk(KERN_DEBUG "%s: failed to clone " + "multicast frame\n", dev->name); + } else { + struct sta_info *dsta; + dsta = sta_info_get(local, skb->data); + if (dsta && !dsta->dev) { + if (net_ratelimit()) + printk(KERN_DEBUG "Station with null " + "dev structure!\n"); + } else if (dsta && dsta->dev == dev) { + /* Destination station is associated to this + * AP, so send the frame directly to it and + * do not pass the frame to local net stack. + */ + skb2 = skb; + skb = NULL; + } + if (dsta) + sta_info_put(dsta); + } + } + + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + } + + if (skb2) { + /* send to wireless media */ + skb2->protocol = __constant_htons(ETH_P_802_3); + skb_set_network_header(skb2, 0); + skb_set_mac_header(skb2, 0); + dev_queue_xmit(skb2); + } + + return TXRX_QUEUED; +} + +static ieee80211_txrx_result +ieee80211_rx_h_mgmt(struct ieee80211_txrx_data *rx) +{ + struct ieee80211_sub_if_data *sdata; + + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) + return TXRX_DROP; + + sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + if ((sdata->type == IEEE80211_IF_TYPE_STA || + sdata->type == IEEE80211_IF_TYPE_IBSS) && + !rx->local->user_space_mlme) { + ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->u.rx.status); + } else { + /* Management frames are sent to hostapd for processing */ + if (!rx->local->apdev) + return TXRX_DROP; + ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, + ieee80211_msg_normal); + } + return TXRX_QUEUED; +} + +static inline ieee80211_txrx_result __ieee80211_invoke_rx_handlers( + struct ieee80211_local *local, + ieee80211_rx_handler *handlers, + struct ieee80211_txrx_data *rx, + struct sta_info *sta) +{ + ieee80211_rx_handler *handler; + ieee80211_txrx_result res = TXRX_DROP; + + for (handler = handlers; *handler != NULL; handler++) { + res = (*handler)(rx); + + switch (res) { + case TXRX_CONTINUE: + continue; + case TXRX_DROP: + I802_DEBUG_INC(local->rx_handlers_drop); + if (sta) + sta->rx_dropped++; + break; + case TXRX_QUEUED: + I802_DEBUG_INC(local->rx_handlers_queued); + break; + } + break; + } + + if (res == TXRX_DROP) + dev_kfree_skb(rx->skb); + return res; +} + +static inline void ieee80211_invoke_rx_handlers(struct ieee80211_local *local, + ieee80211_rx_handler *handlers, + struct ieee80211_txrx_data *rx, + struct sta_info *sta) +{ + if (__ieee80211_invoke_rx_handlers(local, handlers, rx, sta) == + TXRX_CONTINUE) + dev_kfree_skb(rx->skb); +} + +static void ieee80211_rx_michael_mic_report(struct net_device *dev, + struct ieee80211_hdr *hdr, + struct sta_info *sta, + struct ieee80211_txrx_data *rx) +{ + int keyidx, hdrlen; + + hdrlen = ieee80211_get_hdrlen_from_skb(rx->skb); + if (rx->skb->len >= hdrlen + 4) + keyidx = rx->skb->data[hdrlen + 3] >> 6; + else + keyidx = -1; + + /* TODO: verify that this is not triggered by fragmented + * frames (hw does not verify MIC for them). */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: TKIP hwaccel reported Michael MIC " + "failure from " MAC_FMT " to " MAC_FMT " keyidx=%d\n", + dev->name, MAC_ARG(hdr->addr2), MAC_ARG(hdr->addr1), + keyidx); + + if (!sta) { + /* Some hardware versions seem to generate incorrect + * Michael MIC reports; ignore them to avoid triggering + * countermeasures. */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for unknown address " MAC_FMT "\n", + dev->name, MAC_ARG(hdr->addr2)); + goto ignore; + } + + if (!(rx->fc & IEEE80211_FCTL_PROTECTED)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for a frame with no ISWEP flag (src " + MAC_FMT ")\n", dev->name, MAC_ARG(hdr->addr2)); + goto ignore; + } + + if ((rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && + rx->sdata->type == IEEE80211_IF_TYPE_AP && keyidx) { + /* AP with Pairwise keys support should never receive Michael + * MIC errors for non-zero keyidx because these are reserved + * for group keys and only the AP is sending real multicast + * frames in BSS. */ + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored Michael MIC error for " + "a frame with non-zero keyidx (%d)" + " (src " MAC_FMT ")\n", dev->name, keyidx, + MAC_ARG(hdr->addr2)); + goto ignore; + } + + if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: ignored spurious Michael MIC " + "error for a frame that cannot be encrypted " + "(fc=0x%04x) (src " MAC_FMT ")\n", + dev->name, rx->fc, MAC_ARG(hdr->addr2)); + goto ignore; + } + + /* TODO: consider verifying the MIC error report with software + * implementation if we get too many spurious reports from the + * hardware. */ + + mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr); + ignore: + dev_kfree_skb(rx->skb); + rx->skb = NULL; +} + +ieee80211_rx_handler ieee80211_rx_handlers[] = +{ + ieee80211_rx_h_if_stats, + ieee80211_rx_h_monitor, + ieee80211_rx_h_passive_scan, + ieee80211_rx_h_check, + ieee80211_rx_h_load_key, + ieee80211_rx_h_sta_process, + ieee80211_rx_h_ccmp_decrypt, + ieee80211_rx_h_tkip_decrypt, + ieee80211_rx_h_wep_weak_iv_detection, + ieee80211_rx_h_wep_decrypt, + ieee80211_rx_h_defragment, + ieee80211_rx_h_ps_poll, + ieee80211_rx_h_michael_mic_verify, + /* this must be after decryption - so header is counted in MPDU mic + * must be before pae and data, so QOS_DATA format frames + * are not passed to user space by these functions + */ + ieee80211_rx_h_remove_qos_control, + ieee80211_rx_h_802_1x_pae, + ieee80211_rx_h_drop_unencrypted, + ieee80211_rx_h_data, + ieee80211_rx_h_mgmt, + NULL +}; + +/* main receive path */ + +static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, + u8 *bssid, struct ieee80211_txrx_data *rx, + struct ieee80211_hdr *hdr) +{ + int multicast = is_multicast_ether_addr(hdr->addr1); + + switch (sdata->type) { + case IEEE80211_IF_TYPE_STA: + if (!bssid) + return 0; + if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!multicast && + compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1) != 0) { + if (!(sdata->flags & IEEE80211_SDATA_PROMISC)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } + break; + case IEEE80211_IF_TYPE_IBSS: + if (!bssid) + return 0; + if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!multicast && + compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1) != 0) { + if (!(sdata->flags & IEEE80211_SDATA_PROMISC)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } else if (!rx->sta) + rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb, + bssid, hdr->addr2); + break; + case IEEE80211_IF_TYPE_AP: + if (!bssid) { + if (compare_ether_addr(sdata->dev->dev_addr, + hdr->addr1)) + return 0; + } else if (!ieee80211_bssid_match(bssid, + sdata->dev->dev_addr)) { + if (!(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + return 0; + rx->flags &= ~IEEE80211_TXRXD_RXRA_MATCH; + } + if (sdata->dev == sdata->local->mdev && + !(rx->flags & IEEE80211_TXRXD_RXIN_SCAN)) + /* do not receive anything via + * master device when not scanning */ + return 0; + break; + case IEEE80211_IF_TYPE_WDS: + if (bssid || + (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) + return 0; + if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) + return 0; + break; + } + + return 1; +} + +/* + * This is the receive path handler. It is called by a low level driver when an + * 802.11 MPDU is received from the hardware. + */ +void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + struct ieee80211_hdr *hdr; + struct ieee80211_txrx_data rx; + u16 type; + int radiotap_len = 0, prepres; + struct ieee80211_sub_if_data *prev = NULL; + struct sk_buff *skb_new; + u8 *bssid; + + if (status->flag & RX_FLAG_RADIOTAP) { + radiotap_len = ieee80211_get_radiotap_len(skb->data); + skb_pull(skb, radiotap_len); + } + + hdr = (struct ieee80211_hdr *) skb->data; + memset(&rx, 0, sizeof(rx)); + rx.skb = skb; + rx.local = local; + + rx.u.rx.status = status; + rx.fc = skb->len >= 2 ? le16_to_cpu(hdr->frame_control) : 0; + type = rx.fc & IEEE80211_FCTL_FTYPE; + if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) + local->dot11ReceivedFragmentCount++; + + if (skb->len >= 16) { + sta = rx.sta = sta_info_get(local, hdr->addr2); + if (sta) { + rx.dev = rx.sta->dev; + rx.sdata = IEEE80211_DEV_TO_SUB_IF(rx.dev); + } + } else + sta = rx.sta = NULL; + + if ((status->flag & RX_FLAG_MMIC_ERROR)) { + ieee80211_rx_michael_mic_report(local->mdev, hdr, sta, &rx); + goto end; + } + + if (unlikely(local->sta_scanning)) + rx.flags |= IEEE80211_TXRXD_RXIN_SCAN; + + if (__ieee80211_invoke_rx_handlers(local, local->rx_pre_handlers, &rx, + sta) != TXRX_CONTINUE) + goto end; + skb = rx.skb; + + skb_push(skb, radiotap_len); + if (sta && !(sta->flags & (WLAN_STA_WDS | WLAN_STA_ASSOC_AP)) && + !local->iff_promiscs && !is_multicast_ether_addr(hdr->addr1)) { + rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, &rx, + rx.sta); + sta_info_put(sta); + return; + } + + bssid = ieee80211_get_bssid(hdr, skb->len - radiotap_len); + + read_lock(&local->sub_if_lock); + list_for_each_entry(sdata, &local->sub_if_list, list) { + rx.flags |= IEEE80211_TXRXD_RXRA_MATCH; + + if (!netif_running(sdata->dev)) + continue; + + prepres = prepare_for_handlers(sdata, bssid, &rx, hdr); + /* prepare_for_handlers can change sta */ + sta = rx.sta; + + if (!prepres) + continue; + + /* + * frame is destined for this interface, but if it's not + * also for the previous one we handle that after the + * loop to avoid copying the SKB once too much + */ + + if (!prev) { + prev = sdata; + continue; + } + + /* + * frame was destined for the previous interface + * so invoke RX handlers for it + */ + + skb_new = skb_copy(skb, GFP_ATOMIC); + if (!skb_new) { + if (net_ratelimit()) + printk(KERN_DEBUG "%s: failed to copy " + "multicast frame for %s", + local->mdev->name, prev->dev->name); + continue; + } + rx.skb = skb_new; + rx.dev = prev->dev; + rx.sdata = prev; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, + &rx, sta); + prev = sdata; + } + if (prev) { + rx.skb = skb; + rx.dev = prev->dev; + rx.sdata = prev; + ieee80211_invoke_rx_handlers(local, local->rx_handlers, + &rx, sta); + } else + dev_kfree_skb(skb); + read_unlock(&local->sub_if_lock); + + end: + if (sta) + sta_info_put(sta); +} +EXPORT_SYMBOL(__ieee80211_rx); + +/* This is a version of the rx handler that can be called from hard irq + * context. Post the skb on the queue and schedule the tasklet */ +void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb, + struct ieee80211_rx_status *status) +{ + struct ieee80211_local *local = hw_to_local(hw); + + BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb)); + + skb->dev = local->mdev; + /* copy status into skb->cb for use by tasklet */ + memcpy(skb->cb, status, sizeof(*status)); + skb->pkt_type = IEEE80211_RX_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_rx_irqsafe); diff -puN net/mac80211/sta_info.c~git-net net/mac80211/sta_info.c --- a/net/mac80211/sta_info.c~git-net +++ a/net/mac80211/sta_info.c @@ -19,7 +19,6 @@ #include "ieee80211_i.h" #include "ieee80211_rate.h" #include "sta_info.h" -#include "debugfs_key.h" #include "debugfs_sta.h" /* Caller must hold local->sta_lock */ @@ -32,38 +31,34 @@ static void sta_info_hash_add(struct iee /* Caller must hold local->sta_lock */ -static void sta_info_hash_del(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_del(struct ieee80211_local *local, + struct sta_info *sta) { struct sta_info *s; s = local->sta_hash[STA_HASH(sta->addr)]; if (!s) - return; - if (memcmp(s->addr, sta->addr, ETH_ALEN) == 0) { + return -ENOENT; + if (s == sta) { local->sta_hash[STA_HASH(sta->addr)] = s->hnext; - return; + return 0; } - while (s->hnext && memcmp(s->hnext->addr, sta->addr, ETH_ALEN) != 0) + while (s->hnext && s->hnext != sta) s = s->hnext; - if (s->hnext) - s->hnext = s->hnext->hnext; - else - printk(KERN_ERR "%s: could not remove STA " MAC_FMT " from " - "hash table\n", local->mdev->name, MAC_ARG(sta->addr)); -} + if (s->hnext) { + s->hnext = sta->hnext; + return 0; + } -static inline void __sta_info_get(struct sta_info *sta) -{ - kref_get(&sta->kref); + return -ENOENT; } struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) { struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); sta = local->sta_hash[STA_HASH(addr)]; while (sta) { if (memcmp(sta->addr, addr, ETH_ALEN) == 0) { @@ -72,7 +67,7 @@ struct sta_info *sta_info_get(struct iee } sta = sta->hnext; } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); return sta; } @@ -85,7 +80,7 @@ int sta_info_min_txrate_get(struct ieee8 int min_txrate = 9999999; int i; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); mode = local->oper_hw_mode; for (i = 0; i < STA_HASH_SIZE; i++) { sta = local->sta_hash[i]; @@ -95,7 +90,7 @@ int sta_info_min_txrate_get(struct ieee8 sta = sta->hnext; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); if (min_txrate == 9999999) min_txrate = 0; @@ -122,8 +117,6 @@ static void sta_info_release(struct kref } rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); rate_control_put(sta->rate_ctrl); - if (sta->key) - ieee80211_debugfs_key_sta_del(sta->key, sta); kfree(sta); } @@ -150,7 +143,6 @@ struct sta_info * sta_info_add(struct ie sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); - kref_put(&sta->kref, sta_info_release); kfree(sta); return NULL; } @@ -162,15 +154,14 @@ struct sta_info * sta_info_add(struct ie skb_queue_head_init(&sta->tx_filtered); __sta_info_get(sta); /* sta used by caller, decremented by * sta_info_put() */ - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_add(&sta->list, &local->sta_list); local->num_sta++; sta_info_hash_add(local, sta); - spin_unlock_bh(&local->sta_lock); if (local->ops->sta_table_notification) local->ops->sta_table_notification(local_to_hw(local), local->num_sta); - sta->key_idx_compression = HW_KEY_IDX_INVALID; + write_unlock_bh(&local->sta_lock); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Added STA " MAC_FMT "\n", @@ -178,47 +169,25 @@ struct sta_info * sta_info_add(struct ie #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_DEBUGFS - if (!in_interrupt()) { - sta->debugfs_registered = 1; - ieee80211_sta_debugfs_add(sta); - rate_control_add_sta_debugfs(sta); - } else { - /* debugfs entry adding might sleep, so schedule process - * context task for adding entry for STAs that do not yet - * have one. */ - queue_work(local->hw.workqueue, &local->sta_debugfs_add); - } + /* debugfs entry adding might sleep, so schedule process + * context task for adding entry for STAs that do not yet + * have one. */ + queue_work(local->hw.workqueue, &local->sta_debugfs_add); #endif return sta; } -static void finish_sta_info_free(struct ieee80211_local *local, - struct sta_info *sta) -{ -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Removed STA " MAC_FMT "\n", - local->mdev->name, MAC_ARG(sta->addr)); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - - if (sta->key) { - ieee80211_debugfs_key_remove(sta->key); - ieee80211_key_free(sta->key); - sta->key = NULL; - } - - rate_control_remove_sta_debugfs(sta); - ieee80211_sta_debugfs_remove(sta); - - sta_info_put(sta); -} - -static void sta_info_remove(struct sta_info *sta) +/* Caller must hold local->sta_lock */ +void sta_info_remove(struct sta_info *sta) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata; - sta_info_hash_del(local, sta); + /* don't do anything if we've been removed already */ + if (sta_info_hash_del(local, sta)) + return; + list_del(&sta->list); sdata = IEEE80211_DEV_TO_SUB_IF(sta->dev); if (sta->flags & WLAN_STA_PS) { @@ -228,61 +197,43 @@ static void sta_info_remove(struct sta_i } local->num_sta--; sta_info_remove_aid_ptr(sta); + + if (local->ops->sta_table_notification) + local->ops->sta_table_notification(local_to_hw(local), + local->num_sta); } -void sta_info_free(struct sta_info *sta, int locked) +void sta_info_free(struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_local *local = sta->local; - if (!locked) { - spin_lock_bh(&local->sta_lock); - sta_info_remove(sta); - spin_unlock_bh(&local->sta_lock); - } else { - sta_info_remove(sta); - } - if (local->ops->sta_table_notification) - local->ops->sta_table_notification(local_to_hw(local), - local->num_sta); + might_sleep(); + + write_lock_bh(&local->sta_lock); + sta_info_remove(sta); + write_unlock_bh(&local->sta_lock); while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { local->total_ps_buffered--; - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } - if (sta->key) { - if (local->ops->set_key) { - struct ieee80211_key_conf *key; - key = ieee80211_key_data2conf(local, sta->key); - if (key) { - local->ops->set_key(local_to_hw(local), - DISABLE_KEY, - sta->addr, key, sta->aid); - kfree(key); - } - } - } else if (sta->key_idx_compression != HW_KEY_IDX_INVALID) { - struct ieee80211_key_conf conf; - memset(&conf, 0, sizeof(conf)); - conf.hw_key_idx = sta->key_idx_compression; - conf.alg = ALG_NULL; - conf.flags |= IEEE80211_KEY_FORCE_SW_ENCRYPT; - local->ops->set_key(local_to_hw(local), DISABLE_KEY, - sta->addr, &conf, sta->aid); - sta->key_idx_compression = HW_KEY_IDX_INVALID; - } +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Removed STA " MAC_FMT "\n", + local->mdev->name, MAC_ARG(sta->addr)); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ -#ifdef CONFIG_MAC80211_DEBUGFS - if (in_atomic()) { - list_add(&sta->list, &local->deleted_sta_list); - queue_work(local->hw.workqueue, &local->sta_debugfs_add); - } else -#endif - finish_sta_info_free(local, sta); + ieee80211_key_free(sta->key); + sta->key = NULL; + + rate_control_remove_sta_debugfs(sta); + ieee80211_sta_debugfs_remove(sta); + + sta_info_put(sta); } @@ -343,13 +294,13 @@ static void sta_info_cleanup(unsigned lo struct ieee80211_local *local = (struct ieee80211_local *) data; struct sta_info *sta; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(sta, &local->sta_list, list) { __sta_info_get(sta); sta_info_cleanup_expire_buffered(local, sta); sta_info_put(sta); } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; add_timer(&local->sta_cleanup); @@ -363,35 +314,20 @@ static void sta_info_debugfs_add_task(st struct sta_info *sta, *tmp; while (1) { - spin_lock_bh(&local->sta_lock); - if (!list_empty(&local->deleted_sta_list)) { - sta = list_entry(local->deleted_sta_list.next, - struct sta_info, list); - list_del(local->deleted_sta_list.next); - } else - sta = NULL; - spin_unlock_bh(&local->sta_lock); - if (!sta) - break; - finish_sta_info_free(local, sta); - } - - while (1) { sta = NULL; - spin_lock_bh(&local->sta_lock); + read_lock_bh(&local->sta_lock); list_for_each_entry(tmp, &local->sta_list, list) { - if (!tmp->debugfs_registered) { + if (!tmp->debugfs.dir) { sta = tmp; __sta_info_get(sta); break; } } - spin_unlock_bh(&local->sta_lock); + read_unlock_bh(&local->sta_lock); if (!sta) break; - sta->debugfs_registered = 1; ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); sta_info_put(sta); @@ -401,9 +337,8 @@ static void sta_info_debugfs_add_task(st void sta_info_init(struct ieee80211_local *local) { - spin_lock_init(&local->sta_lock); + rwlock_init(&local->sta_lock); INIT_LIST_HEAD(&local->sta_list); - INIT_LIST_HEAD(&local->deleted_sta_list); init_timer(&local->sta_cleanup); local->sta_cleanup.expires = jiffies + STA_INFO_CLEANUP_INTERVAL; @@ -423,17 +358,8 @@ int sta_info_start(struct ieee80211_loca void sta_info_stop(struct ieee80211_local *local) { - struct sta_info *sta, *tmp; - del_timer(&local->sta_cleanup); - - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { - /* sta_info_free must be called with 0 as the last - * parameter to ensure all debugfs sta entries are - * unregistered. We don't need locking at this - * point. */ - sta_info_free(sta, 0); - } + sta_info_flush(local, NULL); } void sta_info_remove_aid_ptr(struct sta_info *sta) @@ -461,10 +387,19 @@ void sta_info_remove_aid_ptr(struct sta_ void sta_info_flush(struct ieee80211_local *local, struct net_device *dev) { struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); - spin_lock_bh(&local->sta_lock); + write_lock_bh(&local->sta_lock); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) - if (!dev || dev == sta->dev) - sta_info_free(sta, 1); - spin_unlock_bh(&local->sta_lock); + if (!dev || dev == sta->dev) { + __sta_info_get(sta); + sta_info_remove(sta); + list_add_tail(&sta->list, &tmp_list); + } + write_unlock_bh(&local->sta_lock); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) { + sta_info_free(sta); + sta_info_put(sta); + } } diff -puN net/mac80211/sta_info.h~git-net net/mac80211/sta_info.h --- a/net/mac80211/sta_info.h~git-net +++ a/net/mac80211/sta_info.h @@ -26,6 +26,8 @@ * send and receive non-IEEE 802.1X frames */ #define WLAN_STA_SHORT_PREAMBLE BIT(7) +/* whether this is an AP that we are associated with as a client */ +#define WLAN_STA_ASSOC_AP BIT(8) #define WLAN_STA_WME BIT(9) #define WLAN_STA_WDS BIT(27) @@ -90,20 +92,6 @@ struct sta_info { int channel_use; int channel_use_raw; - u8 antenna_sel_tx; - u8 antenna_sel_rx; - - - int key_idx_compression; /* key table index for compression and TX - * filtering; used only if sta->key is not - * set */ - -#ifdef CONFIG_MAC80211_DEBUGFS - int debugfs_registered; -#endif - int assoc_ap; /* whether this is an AP that we are - * associated with as a client */ - #ifdef CONFIG_MAC80211_DEBUG_COUNTERS unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; @@ -149,12 +137,18 @@ struct sta_info { */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +static inline void __sta_info_get(struct sta_info *sta) +{ + kref_get(&sta->kref); +} + struct sta_info * sta_info_get(struct ieee80211_local *local, u8 *addr); int sta_info_min_txrate_get(struct ieee80211_local *local); void sta_info_put(struct sta_info *sta); struct sta_info * sta_info_add(struct ieee80211_local *local, struct net_device *dev, u8 *addr, gfp_t gfp); -void sta_info_free(struct sta_info *sta, int locked); +void sta_info_remove(struct sta_info *sta); +void sta_info_free(struct sta_info *sta); void sta_info_init(struct ieee80211_local *local); int sta_info_start(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); diff -puN net/mac80211/tkip.c~git-net net/mac80211/tkip.c --- a/net/mac80211/tkip.c~git-net +++ a/net/mac80211/tkip.c @@ -182,7 +182,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, stru *pos++ = iv0; *pos++ = iv1; *pos++ = iv2; - *pos++ = (key->keyidx << 6) | (1 << 5) /* Ext IV */; + *pos++ = (key->conf.keyidx << 6) | (1 << 5) /* Ext IV */; *pos++ = key->u.tkip.iv32 & 0xff; *pos++ = (key->u.tkip.iv32 >> 8) & 0xff; *pos++ = (key->u.tkip.iv32 >> 16) & 0xff; @@ -194,7 +194,7 @@ u8 * ieee80211_tkip_add_iv(u8 *pos, stru void ieee80211_tkip_gen_phase1key(struct ieee80211_key *key, u8 *ta, u16 *phase1key) { - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv32, phase1key); } @@ -204,12 +204,13 @@ void ieee80211_tkip_gen_rc4key(struct ie /* Calculate per-packet key */ if (key->u.tkip.iv16 == 0 || !key->u.tkip.tx_initialized) { /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv32, key->u.tkip.p1k); key->u.tkip.tx_initialized = 1; } - tkip_mixing_phase2(key->u.tkip.p1k, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase2(key->u.tkip.p1k, + &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], key->u.tkip.iv16, rc4key); } @@ -266,7 +267,7 @@ int ieee80211_tkip_decrypt_data(struct c if (!(keyid & (1 << 5))) return TKIP_DECRYPT_NO_EXT_IV; - if ((keyid >> 6) != key->keyidx) + if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; if (key->u.tkip.rx_initialized[queue] && @@ -293,7 +294,7 @@ int ieee80211_tkip_decrypt_data(struct c key->u.tkip.iv32_rx[queue] != iv32) { key->u.tkip.rx_initialized[queue] = 1; /* IV16 wrapped around - perform TKIP phase 1 */ - tkip_mixing_phase1(ta, &key->key[ALG_TKIP_TEMP_ENCR_KEY], + tkip_mixing_phase1(ta, &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv32, key->u.tkip.p1k_rx[queue]); #ifdef CONFIG_TKIP_DEBUG { @@ -302,7 +303,8 @@ int ieee80211_tkip_decrypt_data(struct c " TK=", MAC_ARG(ta)); for (i = 0; i < 16; i++) printk("%02x ", - key->key[ALG_TKIP_TEMP_ENCR_KEY + i]); + key->conf.key[ + ALG_TKIP_TEMP_ENCR_KEY + i]); printk("\n"); printk(KERN_DEBUG "TKIP decrypt: P1K="); for (i = 0; i < 5; i++) @@ -313,7 +315,7 @@ int ieee80211_tkip_decrypt_data(struct c } tkip_mixing_phase2(key->u.tkip.p1k_rx[queue], - &key->key[ALG_TKIP_TEMP_ENCR_KEY], + &key->conf.key[ALG_TKIP_TEMP_ENCR_KEY], iv16, rc4key); #ifdef CONFIG_TKIP_DEBUG { diff -puN /dev/null net/mac80211/tx.c --- /dev/null +++ a/net/mac80211/tx.c @@ -0,0 +1,1896 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * Transmit and frame generation functions. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ieee80211_i.h" +#include "ieee80211_led.h" +#include "wep.h" +#include "wpa.h" +#include "wme.h" +#include "ieee80211_rate.h" + +#define IEEE80211_TX_OK 0 +#define IEEE80211_TX_AGAIN 1 +#define IEEE80211_TX_FRAG_AGAIN 2 + +/* misc utils */ + +static inline void ieee80211_include_sequence(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr) +{ + /* Set the sequence number for this frame. */ + hdr->seq_ctrl = cpu_to_le16(sdata->sequence); + + /* Increase the sequence number. */ + sdata->sequence = (sdata->sequence + 0x10) & IEEE80211_SCTL_SEQ; +} + +#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP +static void ieee80211_dump_frame(const char *ifname, const char *title, + const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u16 fc; + int hdrlen; + + printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); + if (skb->len < 4) { + printk("\n"); + return; + } + + fc = le16_to_cpu(hdr->frame_control); + hdrlen = ieee80211_get_hdrlen(fc); + if (hdrlen > skb->len) + hdrlen = skb->len; + if (hdrlen >= 4) + printk(" FC=0x%04x DUR=0x%04x", + fc, le16_to_cpu(hdr->duration_id)); + if (hdrlen >= 10) + printk(" A1=" MAC_FMT, MAC_ARG(hdr->addr1)); + if (hdrlen >= 16) + printk(" A2=" MAC_FMT, MAC_ARG(hdr->addr2)); + if (hdrlen >= 24) + printk(" A3=" MAC_FMT, MAC_ARG(hdr->addr3)); + if (hdrlen >= 30) + printk(" A4=" MAC_FMT, MAC_ARG(hdr->addr4)); + printk("\n"); +} +#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ +static inline void ieee80211_dump_frame(const char *ifname, const char *title, + struct sk_buff *skb) +{ +} +#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ + +static u16 ieee80211_duration(struct ieee80211_txrx_data *tx, int group_addr, + int next_frag_len) +{ + int rate, mrate, erp, dur, i; + struct ieee80211_rate *txrate = tx->u.tx.rate; + struct ieee80211_local *local = tx->local; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + + erp = txrate->flags & IEEE80211_RATE_ERP; + + /* + * data and mgmt (except PS Poll): + * - during CFP: 32768 + * - during contention period: + * if addr1 is group address: 0 + * if more fragments = 0 and addr1 is individual address: time to + * transmit one ACK plus SIFS + * if more fragments = 1 and addr1 is individual address: time to + * transmit next fragment plus 2 x ACK plus 3 x SIFS + * + * IEEE 802.11, 9.6: + * - control response frame (CTS or ACK) shall be transmitted using the + * same rate as the immediately previous frame in the frame exchange + * sequence, if this rate belongs to the PHY mandatory rates, or else + * at the highest possible rate belonging to the PHY rates in the + * BSSBasicRateSet + */ + + if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { + /* TODO: These control frames are not currently sent by + * 80211.o, but should they be implemented, this function + * needs to be updated to support duration field calculation. + * + * RTS: time needed to transmit pending data/mgmt frame plus + * one CTS frame plus one ACK frame plus 3 x SIFS + * CTS: duration of immediately previous RTS minus time + * required to transmit CTS and its SIFS + * ACK: 0 if immediately previous directed data/mgmt had + * more=0, with more=1 duration in ACK frame is duration + * from previous frame minus time needed to transmit ACK + * and its SIFS + * PS Poll: BIT(15) | BIT(14) | aid + */ + return 0; + } + + /* data/mgmt */ + if (0 /* FIX: data/mgmt during CFP */) + return 32768; + + if (group_addr) /* Group address as the destination - no ACK */ + return 0; + + /* Individual destination address: + * IEEE 802.11, Ch. 9.6 (after IEEE 802.11g changes) + * CTS and ACK frames shall be transmitted using the highest rate in + * basic rate set that is less than or equal to the rate of the + * immediately previous frame and that is using the same modulation + * (CCK or OFDM). If no basic rate set matches with these requirements, + * the highest mandatory rate of the PHY that is less than or equal to + * the rate of the previous frame is used. + * Mandatory rates for IEEE 802.11g PHY: 1, 2, 5.5, 11, 6, 12, 24 Mbps + */ + rate = -1; + mrate = 10; /* use 1 Mbps if everything fails */ + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + if (r->rate > txrate->rate) + break; + + if (IEEE80211_RATE_MODULATION(txrate->flags) != + IEEE80211_RATE_MODULATION(r->flags)) + continue; + + if (r->flags & IEEE80211_RATE_BASIC) + rate = r->rate; + else if (r->flags & IEEE80211_RATE_MANDATORY) + mrate = r->rate; + } + if (rate == -1) { + /* No matching basic rate found; use highest suitable mandatory + * PHY rate */ + rate = mrate; + } + + /* Time needed to transmit ACK + * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up + * to closest integer */ + + dur = ieee80211_frame_duration(local, 10, rate, erp, + tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); + + if (next_frag_len) { + /* Frame is fragmented: duration increases with time needed to + * transmit next fragment plus ACK and 2 x SIFS. */ + dur *= 2; /* ACK + SIFS */ + /* next fragment */ + dur += ieee80211_frame_duration(local, next_frag_len, + txrate->rate, erp, + tx->sdata->flags & + IEEE80211_SDATA_SHORT_PREAMBLE); + } + + return dur; +} + +static inline int __ieee80211_queue_stopped(const struct ieee80211_local *local, + int queue) +{ + return test_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); +} + +static inline int __ieee80211_queue_pending(const struct ieee80211_local *local, + int queue) +{ + return test_bit(IEEE80211_LINK_STATE_PENDING, &local->state[queue]); +} + +static int inline is_ieee80211_device(struct net_device *dev, + struct net_device *master) +{ + return (wdev_priv(dev->ieee80211_ptr) == + wdev_priv(master->ieee80211_ptr)); +} + +/* tx handlers */ + +static ieee80211_txrx_result +ieee80211_tx_h_check_assoc(struct ieee80211_txrx_data *tx) +{ +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + u32 sta_flags; + + if (unlikely(tx->local->sta_scanning != 0) && + ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) + return TXRX_DROP; + + if (tx->flags & IEEE80211_TXRXD_TXPS_BUFFERED) + return TXRX_CONTINUE; + + sta_flags = tx->sta ? tx->sta->flags : 0; + + if (likely(tx->flags & IEEE80211_TXRXD_TXUNICAST)) { + if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && + tx->sdata->type != IEEE80211_IF_TYPE_IBSS && + (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: dropped data frame to not " + "associated station " MAC_FMT "\n", + tx->dev->name, MAC_ARG(hdr->addr1)); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc); + return TXRX_DROP; + } + } else { + if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + tx->local->num_sta == 0 && + tx->sdata->type != IEEE80211_IF_TYPE_IBSS)) { + /* + * No associated STAs - no need to send multicast + * frames. + */ + return TXRX_DROP; + } + return TXRX_CONTINUE; + } + + if (unlikely(!tx->u.tx.mgmt_interface && tx->sdata->ieee802_1x && + !(sta_flags & WLAN_STA_AUTHORIZED))) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: dropped frame to " MAC_FMT + " (unauthorized port)\n", tx->dev->name, + MAC_ARG(hdr->addr1)); +#endif + I802_DEBUG_INC(tx->local->tx_handlers_drop_unauth_port); + return TXRX_DROP; + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_sequence(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + + if (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)) >= 24) + ieee80211_include_sequence(tx->sdata, hdr); + + return TXRX_CONTINUE; +} + +/* This function is called whenever the AP is about to exceed the maximum limit + * of buffered frames for power saving STAs. This situation should not really + * happen often during normal operation, so dropping the oldest buffered packet + * from each queue should be OK to make some room for new frames. */ +static void purge_old_ps_buffers(struct ieee80211_local *local) +{ + int total = 0, purged = 0; + struct sk_buff *skb; + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + + read_lock(&local->sub_if_lock); + list_for_each_entry(sdata, &local->sub_if_list, list) { + struct ieee80211_if_ap *ap; + if (sdata->dev == local->mdev || + sdata->type != IEEE80211_IF_TYPE_AP) + continue; + ap = &sdata->u.ap; + skb = skb_dequeue(&ap->ps_bc_buf); + if (skb) { + purged++; + dev_kfree_skb(skb); + } + total += skb_queue_len(&ap->ps_bc_buf); + } + read_unlock(&local->sub_if_lock); + + read_lock_bh(&local->sta_lock); + list_for_each_entry(sta, &local->sta_list, list) { + skb = skb_dequeue(&sta->ps_tx_buf); + if (skb) { + purged++; + dev_kfree_skb(skb); + } + total += skb_queue_len(&sta->ps_tx_buf); + } + read_unlock_bh(&local->sta_lock); + + local->total_ps_buffered = total; + printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", + local->mdev->name, purged); +} + +static inline ieee80211_txrx_result +ieee80211_tx_h_multicast_ps_buf(struct ieee80211_txrx_data *tx) +{ + /* broadcast/multicast frame */ + /* If any of the associated stations is in power save mode, + * the frame is buffered to be sent after DTIM beacon frame */ + if ((tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING) && + tx->sdata->type != IEEE80211_IF_TYPE_WDS && + tx->sdata->bss && atomic_read(&tx->sdata->bss->num_sta_ps) && + !(tx->fc & IEEE80211_FCTL_ORDER)) { + if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) + purge_old_ps_buffers(tx->local); + if (skb_queue_len(&tx->sdata->bss->ps_bc_buf) >= + AP_MAX_BC_BUFFER) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: BC TX buffer full - " + "dropping the oldest frame\n", + tx->dev->name); + } + dev_kfree_skb(skb_dequeue(&tx->sdata->bss->ps_bc_buf)); + } else + tx->local->total_ps_buffered++; + skb_queue_tail(&tx->sdata->bss->ps_bc_buf, tx->skb); + return TXRX_QUEUED; + } + + return TXRX_CONTINUE; +} + +static inline ieee80211_txrx_result +ieee80211_tx_h_unicast_ps_buf(struct ieee80211_txrx_data *tx) +{ + struct sta_info *sta = tx->sta; + + if (unlikely(!sta || + ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && + (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) + return TXRX_CONTINUE; + + if (unlikely((sta->flags & WLAN_STA_PS) && !sta->pspoll)) { + struct ieee80211_tx_packet_data *pkt_data; +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + printk(KERN_DEBUG "STA " MAC_FMT " aid %d: PS buffer (entries " + "before %d)\n", + MAC_ARG(sta->addr), sta->aid, + skb_queue_len(&sta->ps_tx_buf)); +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + sta->flags |= WLAN_STA_TIM; + if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) + purge_old_ps_buffers(tx->local); + if (skb_queue_len(&sta->ps_tx_buf) >= STA_MAX_TX_BUFFER) { + struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf); + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: STA " MAC_FMT " TX " + "buffer full - dropping oldest frame\n", + tx->dev->name, MAC_ARG(sta->addr)); + } + dev_kfree_skb(old); + } else + tx->local->total_ps_buffered++; + /* Queue frame to be sent after STA sends an PS Poll frame */ + if (skb_queue_empty(&sta->ps_tx_buf)) { + if (tx->local->ops->set_tim) + tx->local->ops->set_tim(local_to_hw(tx->local), + sta->aid, 1); + if (tx->sdata->bss) + bss_tim_set(tx->local, tx->sdata->bss, sta->aid); + } + pkt_data = (struct ieee80211_tx_packet_data *)tx->skb->cb; + pkt_data->jiffies = jiffies; + skb_queue_tail(&sta->ps_tx_buf, tx->skb); + return TXRX_QUEUED; + } +#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG + else if (unlikely(sta->flags & WLAN_STA_PS)) { + printk(KERN_DEBUG "%s: STA " MAC_FMT " in PS mode, but pspoll " + "set -> send frame\n", tx->dev->name, + MAC_ARG(sta->addr)); + } +#endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ + sta->pspoll = 0; + + return TXRX_CONTINUE; +} + + +static ieee80211_txrx_result +ieee80211_tx_h_ps_buf(struct ieee80211_txrx_data *tx) +{ + if (unlikely(tx->flags & IEEE80211_TXRXD_TXPS_BUFFERED)) + return TXRX_CONTINUE; + + if (tx->flags & IEEE80211_TXRXD_TXUNICAST) + return ieee80211_tx_h_unicast_ps_buf(tx); + else + return ieee80211_tx_h_multicast_ps_buf(tx); +} + + + + +static ieee80211_txrx_result +ieee80211_tx_h_select_key(struct ieee80211_txrx_data *tx) +{ + tx->u.tx.control->key_idx = HW_KEY_IDX_INVALID; + + if (unlikely(tx->u.tx.control->flags & IEEE80211_TXCTL_DO_NOT_ENCRYPT)) + tx->key = NULL; + else if (tx->sta && tx->sta->key) + tx->key = tx->sta->key; + else if (tx->sdata->default_key) + tx->key = tx->sdata->default_key; + else if (tx->sdata->drop_unencrypted && + !(tx->sdata->eapol && ieee80211_is_eapol(tx->skb))) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); + return TXRX_DROP; + } else + tx->key = NULL; + + if (tx->key) { + tx->key->tx_rx_count++; + if (unlikely(tx->local->key_tx_rx_threshold && + tx->key->tx_rx_count > + tx->local->key_tx_rx_threshold)) { + ieee80211_key_threshold_notify(tx->dev, tx->key, + tx->sta); + } + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_fragment(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + size_t hdrlen, per_fragm, num_fragm, payload_len, left; + struct sk_buff **frags, *first, *frag; + int i; + u16 seq; + u8 *pos; + int frag_threshold = tx->local->fragmentation_threshold; + + if (!(tx->flags & IEEE80211_TXRXD_FRAGMENTED)) + return TXRX_CONTINUE; + + first = tx->skb; + + hdrlen = ieee80211_get_hdrlen(tx->fc); + payload_len = first->len - hdrlen; + per_fragm = frag_threshold - hdrlen - FCS_LEN; + num_fragm = DIV_ROUND_UP(payload_len, per_fragm); + + frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); + if (!frags) + goto fail; + + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); + seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; + pos = first->data + hdrlen + per_fragm; + left = payload_len - per_fragm; + for (i = 0; i < num_fragm - 1; i++) { + struct ieee80211_hdr *fhdr; + size_t copylen; + + if (left <= 0) + goto fail; + + /* reserve enough extra head and tail room for possible + * encryption */ + frag = frags[i] = + dev_alloc_skb(tx->local->tx_headroom + + frag_threshold + + IEEE80211_ENCRYPT_HEADROOM + + IEEE80211_ENCRYPT_TAILROOM); + if (!frag) + goto fail; + /* Make sure that all fragments use the same priority so + * that they end up using the same TX queue */ + frag->priority = first->priority; + skb_reserve(frag, tx->local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); + fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); + memcpy(fhdr, first->data, hdrlen); + if (i == num_fragm - 2) + fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); + fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); + copylen = left > per_fragm ? per_fragm : left; + memcpy(skb_put(frag, copylen), pos, copylen); + + pos += copylen; + left -= copylen; + } + skb_trim(first, hdrlen + per_fragm); + + tx->u.tx.num_extra_frag = num_fragm - 1; + tx->u.tx.extra_frag = frags; + + return TXRX_CONTINUE; + + fail: + printk(KERN_DEBUG "%s: failed to fragment frame\n", tx->dev->name); + if (frags) { + for (i = 0; i < num_fragm - 1; i++) + if (frags[i]) + dev_kfree_skb(frags[i]); + kfree(frags); + } + I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); + return TXRX_DROP; +} + +static int wep_encrypt_skb(struct ieee80211_txrx_data *tx, struct sk_buff *skb) +{ + if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { + if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) + return -1; + } else { + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; + if (tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { + if (ieee80211_wep_add_iv(tx->local, skb, tx->key) == + NULL) + return -1; + } + } + return 0; +} + +static ieee80211_txrx_result +ieee80211_tx_h_wep_encrypt(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + u16 fc; + + fc = le16_to_cpu(hdr->frame_control); + + if (!tx->key || tx->key->conf.alg != ALG_WEP || + ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && + ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || + (fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH))) + return TXRX_CONTINUE; + + tx->u.tx.control->iv_len = WEP_IV_LEN; + tx->u.tx.control->icv_len = WEP_ICV_LEN; + ieee80211_tx_set_iswep(tx); + + if (wep_encrypt_skb(tx, tx->skb) < 0) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); + return TXRX_DROP; + } + + if (tx->u.tx.extra_frag) { + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + if (wep_encrypt_skb(tx, tx->u.tx.extra_frag[i]) < 0) { + I802_DEBUG_INC(tx->local-> + tx_handlers_drop_wep); + return TXRX_DROP; + } + } + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_rate_ctrl(struct ieee80211_txrx_data *tx) +{ + struct rate_control_extra extra; + + memset(&extra, 0, sizeof(extra)); + extra.mode = tx->u.tx.mode; + extra.mgmt_data = tx->sdata && + tx->sdata->type == IEEE80211_IF_TYPE_MGMT; + extra.ethertype = tx->ethertype; + + tx->u.tx.rate = rate_control_get_rate(tx->local, tx->dev, tx->skb, + &extra); + if (unlikely(extra.probe != NULL)) { + tx->u.tx.control->flags |= IEEE80211_TXCTL_RATE_CTRL_PROBE; + tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + tx->u.tx.control->alt_retry_rate = tx->u.tx.rate->val; + tx->u.tx.rate = extra.probe; + } else { + tx->u.tx.control->alt_retry_rate = -1; + } + if (!tx->u.tx.rate) + return TXRX_DROP; + if (tx->u.tx.mode->mode == MODE_IEEE80211G && + (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && + (tx->flags & IEEE80211_TXRXD_FRAGMENTED) && extra.nonerp) { + tx->u.tx.last_frag_rate = tx->u.tx.rate; + if (extra.probe) + tx->flags &= ~IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + else + tx->flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + tx->u.tx.rate = extra.nonerp; + tx->u.tx.control->rate = extra.nonerp; + tx->u.tx.control->flags &= ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + } else { + tx->u.tx.last_frag_rate = tx->u.tx.rate; + tx->u.tx.control->rate = tx->u.tx.rate; + } + tx->u.tx.control->tx_rate = tx->u.tx.rate->val; + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_misc(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + u16 fc = le16_to_cpu(hdr->frame_control); + u16 dur; + struct ieee80211_tx_control *control = tx->u.tx.control; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + + if (!is_multicast_ether_addr(hdr->addr1)) { + if (tx->skb->len + FCS_LEN > tx->local->rts_threshold && + tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD) { + control->flags |= IEEE80211_TXCTL_USE_RTS_CTS; + control->flags |= IEEE80211_TXCTL_LONG_RETRY_LIMIT; + control->retry_limit = + tx->local->long_retry_limit; + } else { + control->retry_limit = + tx->local->short_retry_limit; + } + } else { + control->retry_limit = 1; + } + + if (tx->flags & IEEE80211_TXRXD_FRAGMENTED) { + /* Do not use multiple retry rates when sending fragmented + * frames. + * TODO: The last fragment could still use multiple retry + * rates. */ + control->alt_retry_rate = -1; + } + + /* Use CTS protection for unicast frames sent using extended rates if + * there are associated non-ERP stations and RTS/CTS is not configured + * for the frame. */ + if (mode->mode == MODE_IEEE80211G && + (tx->u.tx.rate->flags & IEEE80211_RATE_ERP) && + (tx->flags & IEEE80211_TXRXD_TXUNICAST) && + (tx->sdata->flags & IEEE80211_SDATA_USE_PROTECTION) && + !(control->flags & IEEE80211_TXCTL_USE_RTS_CTS)) + control->flags |= IEEE80211_TXCTL_USE_CTS_PROTECT; + + /* Transmit data frames using short preambles if the driver supports + * short preambles at the selected rate and short preambles are + * available on the network at the current point in time. */ + if (((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA) && + (tx->u.tx.rate->flags & IEEE80211_RATE_PREAMBLE2) && + (tx->sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (!tx->sta || (tx->sta->flags & WLAN_STA_SHORT_PREAMBLE))) { + tx->u.tx.control->tx_rate = tx->u.tx.rate->val2; + } + + /* Setup duration field for the first fragment of the frame. Duration + * for remaining fragments will be updated when they are being sent + * to low-level driver in ieee80211_tx(). */ + dur = ieee80211_duration(tx, is_multicast_ether_addr(hdr->addr1), + (tx->flags & IEEE80211_TXRXD_FRAGMENTED) ? + tx->u.tx.extra_frag[0]->len : 0); + hdr->duration_id = cpu_to_le16(dur); + + if ((control->flags & IEEE80211_TXCTL_USE_RTS_CTS) || + (control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT)) { + struct ieee80211_rate *rate; + + /* Do not use multiple retry rates when using RTS/CTS */ + control->alt_retry_rate = -1; + + /* Use min(data rate, max base rate) as CTS/RTS rate */ + rate = tx->u.tx.rate; + while (rate > mode->rates && + !(rate->flags & IEEE80211_RATE_BASIC)) + rate--; + + control->rts_cts_rate = rate->val; + control->rts_rate = rate; + } + + if (tx->sta) { + tx->sta->tx_packets++; + tx->sta->tx_fragments++; + tx->sta->tx_bytes += tx->skb->len; + if (tx->u.tx.extra_frag) { + int i; + tx->sta->tx_fragments += tx->u.tx.num_extra_frag; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + tx->sta->tx_bytes += + tx->u.tx.extra_frag[i]->len; + } + } + } + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result +ieee80211_tx_h_load_stats(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_local *local = tx->local; + struct ieee80211_hw_mode *mode = tx->u.tx.mode; + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + u32 load = 0, hdrtime; + + /* TODO: this could be part of tx_status handling, so that the number + * of retries would be known; TX rate should in that case be stored + * somewhere with the packet */ + + /* Estimate total channel use caused by this frame */ + + /* 1 bit at 1 Mbit/s takes 1 usec; in channel_use values, + * 1 usec = 1/8 * (1080 / 10) = 13.5 */ + + if (mode->mode == MODE_IEEE80211A || + mode->mode == MODE_ATHEROS_TURBO || + mode->mode == MODE_ATHEROS_TURBOG || + (mode->mode == MODE_IEEE80211G && + tx->u.tx.rate->flags & IEEE80211_RATE_ERP)) + hdrtime = CHAN_UTIL_HDR_SHORT; + else + hdrtime = CHAN_UTIL_HDR_LONG; + + load = hdrtime; + if (!is_multicast_ether_addr(hdr->addr1)) + load += hdrtime; + + if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_RTS_CTS) + load += 2 * hdrtime; + else if (tx->u.tx.control->flags & IEEE80211_TXCTL_USE_CTS_PROTECT) + load += hdrtime; + + load += skb->len * tx->u.tx.rate->rate_inv; + + if (tx->u.tx.extra_frag) { + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + load += 2 * hdrtime; + load += tx->u.tx.extra_frag[i]->len * + tx->u.tx.rate->rate; + } + } + + /* Divide channel_use by 8 to avoid wrapping around the counter */ + load >>= CHAN_UTIL_SHIFT; + local->channel_use_raw += load; + if (tx->sta) + tx->sta->channel_use_raw += load; + tx->sdata->channel_use_raw += load; + + return TXRX_CONTINUE; +} + +/* TODO: implement register/unregister functions for adding TX/RX handlers + * into ordered list */ + +ieee80211_tx_handler ieee80211_tx_handlers[] = +{ + ieee80211_tx_h_check_assoc, + ieee80211_tx_h_sequence, + ieee80211_tx_h_ps_buf, + ieee80211_tx_h_select_key, + ieee80211_tx_h_michael_mic_add, + ieee80211_tx_h_fragment, + ieee80211_tx_h_tkip_encrypt, + ieee80211_tx_h_ccmp_encrypt, + ieee80211_tx_h_wep_encrypt, + ieee80211_tx_h_rate_ctrl, + ieee80211_tx_h_misc, + ieee80211_tx_h_load_stats, + NULL +}; + +/* actual transmit path */ + +/* + * deal with packet injection down monitor interface + * with Radiotap Header -- only called for monitor mode interface + */ +static ieee80211_txrx_result +__ieee80211_parse_tx_radiotap( + struct ieee80211_txrx_data *tx, + struct sk_buff *skb, struct ieee80211_tx_control *control) +{ + /* + * this is the moment to interpret and discard the radiotap header that + * must be at the start of the packet injected in Monitor mode + * + * Need to take some care with endian-ness since radiotap + * args are little-endian + */ + + struct ieee80211_radiotap_iterator iterator; + struct ieee80211_radiotap_header *rthdr = + (struct ieee80211_radiotap_header *) skb->data; + struct ieee80211_hw_mode *mode = tx->local->hw.conf.mode; + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); + + /* + * default control situation for all injected packets + * FIXME: this does not suit all usage cases, expand to allow control + */ + + control->retry_limit = 1; /* no retry */ + control->key_idx = HW_KEY_IDX_INVALID; + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT); + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT | + IEEE80211_TXCTL_NO_ACK; + control->antenna_sel_tx = 0; /* default to default antenna */ + + /* + * for every radiotap entry that is present + * (ieee80211_radiotap_iterator_next returns -ENOENT when no more + * entries present, or -EINVAL on error) + */ + + while (!ret) { + int i, target_rate; + + ret = ieee80211_radiotap_iterator_next(&iterator); + + if (ret) + continue; + + /* see if this argument is something we can use */ + switch (iterator.this_arg_index) { + /* + * You must take care when dereferencing iterator.this_arg + * for multibyte types... the pointer is not aligned. Use + * get_unaligned((type *)iterator.this_arg) to dereference + * iterator.this_arg for type "type" safely on all arches. + */ + case IEEE80211_RADIOTAP_RATE: + /* + * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps + * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps + */ + target_rate = (*iterator.this_arg) * 5; + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *r = &mode->rates[i]; + + if (r->rate > target_rate) + continue; + + control->rate = r; + + if (r->flags & IEEE80211_RATE_PREAMBLE2) + control->tx_rate = r->val2; + else + control->tx_rate = r->val; + + /* end on exact match */ + if (r->rate == target_rate) + i = mode->num_rates; + } + break; + + case IEEE80211_RADIOTAP_ANTENNA: + /* + * radiotap uses 0 for 1st ant, mac80211 is 1 for + * 1st ant + */ + control->antenna_sel_tx = (*iterator.this_arg) + 1; + break; + + case IEEE80211_RADIOTAP_DBM_TX_POWER: + control->power_level = *iterator.this_arg; + break; + + case IEEE80211_RADIOTAP_FLAGS: + if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { + /* + * this indicates that the skb we have been + * handed has the 32-bit FCS CRC at the end... + * we should react to that by snipping it off + * because it will be recomputed and added + * on transmission + */ + if (skb->len < (iterator.max_length + FCS_LEN)) + return TXRX_DROP; + + skb_trim(skb, skb->len - FCS_LEN); + } + break; + + default: + break; + } + } + + if (ret != -ENOENT) /* ie, if we didn't simply run out of fields */ + return TXRX_DROP; + + /* + * remove the radiotap header + * iterator->max_length was sanity-checked against + * skb->len by iterator init + */ + skb_pull(skb, iterator.max_length); + + return TXRX_CONTINUE; +} + +static ieee80211_txrx_result inline +__ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, + struct sk_buff *skb, + struct net_device *dev, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_sub_if_data *sdata; + ieee80211_txrx_result res = TXRX_CONTINUE; + + int hdrlen; + + memset(tx, 0, sizeof(*tx)); + tx->skb = skb; + tx->dev = dev; /* use original interface */ + tx->local = local; + tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); + tx->sta = sta_info_get(local, hdr->addr1); + tx->fc = le16_to_cpu(hdr->frame_control); + + /* + * set defaults for things that can be set by + * injected radiotap headers + */ + control->power_level = local->hw.conf.power_level; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + + /* process and remove the injection radiotap header */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(sdata->type == IEEE80211_IF_TYPE_MNTR)) { + if (__ieee80211_parse_tx_radiotap(tx, skb, control) == + TXRX_DROP) { + return TXRX_DROP; + } + /* + * we removed the radiotap header after this point, + * we filled control with what we could use + * set to the actual ieee header now + */ + hdr = (struct ieee80211_hdr *) skb->data; + res = TXRX_QUEUED; /* indication it was monitor packet */ + } + + tx->u.tx.control = control; + if (is_multicast_ether_addr(hdr->addr1)) { + tx->flags &= ~IEEE80211_TXRXD_TXUNICAST; + control->flags |= IEEE80211_TXCTL_NO_ACK; + } else { + tx->flags |= IEEE80211_TXRXD_TXUNICAST; + control->flags &= ~IEEE80211_TXCTL_NO_ACK; + } + if (local->fragmentation_threshold < IEEE80211_MAX_FRAG_THRESHOLD && + (tx->flags & IEEE80211_TXRXD_TXUNICAST) && + skb->len + FCS_LEN > local->fragmentation_threshold && + !local->ops->set_frag_threshold) + tx->flags |= IEEE80211_TXRXD_FRAGMENTED; + else + tx->flags &= ~IEEE80211_TXRXD_FRAGMENTED; + if (!tx->sta) + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + else if (tx->sta->clear_dst_mask) { + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + tx->sta->clear_dst_mask = 0; + } + hdrlen = ieee80211_get_hdrlen(tx->fc); + if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { + u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; + tx->ethertype = (pos[0] << 8) | pos[1]; + } + control->flags |= IEEE80211_TXCTL_FIRST_FRAGMENT; + + return res; +} + +/* Device in tx->dev has a reference added; use dev_put(tx->dev) when + * finished with it. */ +static int inline ieee80211_tx_prepare(struct ieee80211_txrx_data *tx, + struct sk_buff *skb, + struct net_device *mdev, + struct ieee80211_tx_control *control) +{ + struct ieee80211_tx_packet_data *pkt_data; + struct net_device *dev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + dev = dev_get_by_index(pkt_data->ifindex); + if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { + dev_put(dev); + dev = NULL; + } + if (unlikely(!dev)) + return -ENODEV; + __ieee80211_tx_prepare(tx, skb, dev, control); + return 0; +} + +static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, + struct ieee80211_txrx_data *tx) +{ + struct ieee80211_tx_control *control = tx->u.tx.control; + int ret, i; + + if (!ieee80211_qdisc_installed(local->mdev) && + __ieee80211_queue_stopped(local, 0)) { + netif_stop_queue(local->mdev); + return IEEE80211_TX_AGAIN; + } + if (skb) { + ieee80211_dump_frame(local->mdev->name, "TX to low-level driver", skb); + ret = local->ops->tx(local_to_hw(local), skb, control); + if (ret) + return IEEE80211_TX_AGAIN; + local->mdev->trans_start = jiffies; + ieee80211_led_tx(local, 1); + } + if (tx->u.tx.extra_frag) { + control->flags &= ~(IEEE80211_TXCTL_USE_RTS_CTS | + IEEE80211_TXCTL_USE_CTS_PROTECT | + IEEE80211_TXCTL_CLEAR_DST_MASK | + IEEE80211_TXCTL_FIRST_FRAGMENT); + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + if (!tx->u.tx.extra_frag[i]) + continue; + if (__ieee80211_queue_stopped(local, control->queue)) + return IEEE80211_TX_FRAG_AGAIN; + if (i == tx->u.tx.num_extra_frag) { + control->tx_rate = tx->u.tx.last_frag_hwrate; + control->rate = tx->u.tx.last_frag_rate; + if (tx->flags & IEEE80211_TXRXD_TXPROBE_LAST_FRAG) + control->flags |= + IEEE80211_TXCTL_RATE_CTRL_PROBE; + else + control->flags &= + ~IEEE80211_TXCTL_RATE_CTRL_PROBE; + } + + ieee80211_dump_frame(local->mdev->name, + "TX to low-level driver", + tx->u.tx.extra_frag[i]); + ret = local->ops->tx(local_to_hw(local), + tx->u.tx.extra_frag[i], + control); + if (ret) + return IEEE80211_TX_FRAG_AGAIN; + local->mdev->trans_start = jiffies; + ieee80211_led_tx(local, 1); + tx->u.tx.extra_frag[i] = NULL; + } + kfree(tx->u.tx.extra_frag); + tx->u.tx.extra_frag = NULL; + } + return IEEE80211_TX_OK; +} + +static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, + struct ieee80211_tx_control *control, int mgmt) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct sta_info *sta; + ieee80211_tx_handler *handler; + struct ieee80211_txrx_data tx; + ieee80211_txrx_result res = TXRX_DROP, res_prepare; + int ret, i; + + WARN_ON(__ieee80211_queue_pending(local, control->queue)); + + if (unlikely(skb->len < 10)) { + dev_kfree_skb(skb); + return 0; + } + + res_prepare = __ieee80211_tx_prepare(&tx, skb, dev, control); + + if (res_prepare == TXRX_DROP) { + dev_kfree_skb(skb); + return 0; + } + + sta = tx.sta; + tx.u.tx.mgmt_interface = mgmt; + tx.u.tx.mode = local->hw.conf.mode; + + if (res_prepare == TXRX_QUEUED) { /* if it was an injected packet */ + res = TXRX_CONTINUE; + } else { + for (handler = local->tx_handlers; *handler != NULL; + handler++) { + res = (*handler)(&tx); + if (res != TXRX_CONTINUE) + break; + } + } + + skb = tx.skb; /* handlers are allowed to change skb */ + + if (sta) + sta_info_put(sta); + + if (unlikely(res == TXRX_DROP)) { + I802_DEBUG_INC(local->tx_handlers_drop); + goto drop; + } + + if (unlikely(res == TXRX_QUEUED)) { + I802_DEBUG_INC(local->tx_handlers_queued); + return 0; + } + + if (tx.u.tx.extra_frag) { + for (i = 0; i < tx.u.tx.num_extra_frag; i++) { + int next_len, dur; + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) + tx.u.tx.extra_frag[i]->data; + + if (i + 1 < tx.u.tx.num_extra_frag) { + next_len = tx.u.tx.extra_frag[i + 1]->len; + } else { + next_len = 0; + tx.u.tx.rate = tx.u.tx.last_frag_rate; + tx.u.tx.last_frag_hwrate = tx.u.tx.rate->val; + } + dur = ieee80211_duration(&tx, 0, next_len); + hdr->duration_id = cpu_to_le16(dur); + } + } + +retry: + ret = __ieee80211_tx(local, skb, &tx); + if (ret) { + struct ieee80211_tx_stored_packet *store = + &local->pending_packet[control->queue]; + + if (ret == IEEE80211_TX_FRAG_AGAIN) + skb = NULL; + set_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[control->queue]); + smp_mb(); + /* When the driver gets out of buffers during sending of + * fragments and calls ieee80211_stop_queue, there is + * a small window between IEEE80211_LINK_STATE_XOFF and + * IEEE80211_LINK_STATE_PENDING flags are set. If a buffer + * gets available in that window (i.e. driver calls + * ieee80211_wake_queue), we would end up with ieee80211_tx + * called with IEEE80211_LINK_STATE_PENDING. Prevent this by + * continuing transmitting here when that situation is + * possible to have happened. */ + if (!__ieee80211_queue_stopped(local, control->queue)) { + clear_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[control->queue]); + goto retry; + } + memcpy(&store->control, control, + sizeof(struct ieee80211_tx_control)); + store->skb = skb; + store->extra_frag = tx.u.tx.extra_frag; + store->num_extra_frag = tx.u.tx.num_extra_frag; + store->last_frag_hwrate = tx.u.tx.last_frag_hwrate; + store->last_frag_rate = tx.u.tx.last_frag_rate; + store->last_frag_rate_ctrl_probe = + !!(tx.flags & IEEE80211_TXRXD_TXPROBE_LAST_FRAG); + } + return 0; + + drop: + if (skb) + dev_kfree_skb(skb); + for (i = 0; i < tx.u.tx.num_extra_frag; i++) + if (tx.u.tx.extra_frag[i]) + dev_kfree_skb(tx.u.tx.extra_frag[i]); + kfree(tx.u.tx.extra_frag); + return 0; +} + +/* device xmit handlers */ + +int ieee80211_master_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_tx_control control; + struct ieee80211_tx_packet_data *pkt_data; + struct net_device *odev = NULL; + struct ieee80211_sub_if_data *osdata; + int headroom; + int ret; + + /* + * copy control out of the skb so other people can use skb->cb + */ + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(&control, 0, sizeof(struct ieee80211_tx_control)); + + if (pkt_data->ifindex) + odev = dev_get_by_index(pkt_data->ifindex); + if (unlikely(odev && !is_ieee80211_device(odev, dev))) { + dev_put(odev); + odev = NULL; + } + if (unlikely(!odev)) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Discarded packet with nonexistent " + "originating device\n", dev->name); +#endif + dev_kfree_skb(skb); + return 0; + } + osdata = IEEE80211_DEV_TO_SUB_IF(odev); + + headroom = osdata->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM; + if (skb_headroom(skb) < headroom) { + if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + dev_put(odev); + return 0; + } + } + + control.ifindex = odev->ifindex; + control.type = osdata->type; + if (pkt_data->flags & IEEE80211_TXPD_REQ_TX_STATUS) + control.flags |= IEEE80211_TXCTL_REQ_TX_STATUS; + if (pkt_data->flags & IEEE80211_TXPD_DO_NOT_ENCRYPT) + control.flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; + if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) + control.flags |= IEEE80211_TXCTL_REQUEUE; + control.queue = pkt_data->queue; + + ret = ieee80211_tx(odev, skb, &control, + control.type == IEEE80211_IF_TYPE_MGMT); + dev_put(odev); + + return ret; +} + +int ieee80211_monitor_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_radiotap_header *prthdr = + (struct ieee80211_radiotap_header *)skb->data; + u16 len_rthdr; + + /* check for not even having the fixed radiotap header part */ + if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) + goto fail; /* too short to be possibly valid */ + + /* is it a header version we can trust to find length from? */ + if (unlikely(prthdr->it_version)) + goto fail; /* only version 0 is supported */ + + /* then there must be a radiotap header with a length we can use */ + len_rthdr = ieee80211_get_radiotap_len(skb->data); + + /* does the skb contain enough to deliver on the alleged length? */ + if (unlikely(skb->len < len_rthdr)) + goto fail; /* skb too short for claimed rt header extent */ + + skb->dev = local->mdev; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(*pkt_data)); + /* needed because we set skb device to master */ + pkt_data->ifindex = dev->ifindex; + + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + + /* + * fix up the pointers accounting for the radiotap + * header still being in there. We are being given + * a precooked IEEE80211 header so no need for + * normal processing + */ + skb_set_mac_header(skb, len_rthdr); + /* + * these are just fixed to the end of the rt area since we + * don't have any better information and at this point, nobody cares + */ + skb_set_network_header(skb, len_rthdr); + skb_set_transport_header(skb, len_rthdr); + + /* pass the radiotap header up to the next stage intact */ + dev_queue_xmit(skb); + return NETDEV_TX_OK; + +fail: + dev_kfree_skb(skb); + return NETDEV_TX_OK; /* meaning, we dealt with the skb */ +} + +/** + * ieee80211_subif_start_xmit - netif start_xmit function for Ethernet-type + * subinterfaces (wlan#, WDS, and VLAN interfaces) + * @skb: packet to be sent + * @dev: incoming interface + * + * Returns: 0 on success (and frees skb in this case) or 1 on failure (skb will + * not be freed, and caller is responsible for either retrying later or freeing + * skb). + * + * This function takes in an Ethernet header and encapsulates it with suitable + * IEEE 802.11 header based on which interface the packet is coming in. The + * encapsulated packet will then be passed to master interface, wlan#.11, for + * transmission (through low-level driver). + */ +int ieee80211_subif_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_sub_if_data *sdata; + int ret = 1, head_need; + u16 ethertype, hdrlen, fc; + struct ieee80211_hdr hdr; + const u8 *encaps_data; + int encaps_len, skip_header_bytes; + int nh_pos, h_pos; + struct sta_info *sta; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (unlikely(skb->len < ETH_HLEN)) { + printk(KERN_DEBUG "%s: short skb (len=%d)\n", + dev->name, skb->len); + ret = 0; + goto fail; + } + + nh_pos = skb_network_header(skb) - skb->data; + h_pos = skb_transport_header(skb) - skb->data; + + /* convert Ethernet header to proper 802.11 header (based on + * operation mode) */ + ethertype = (skb->data[12] << 8) | skb->data[13]; + /* TODO: handling for 802.1x authorized/unauthorized port */ + fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA; + + switch (sdata->type) { + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_VLAN: + fc |= IEEE80211_FCTL_FROMDS; + /* DA BSSID SA */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 24; + break; + case IEEE80211_IF_TYPE_WDS: + fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS; + /* RA TA DA SA */ + memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + hdrlen = 30; + break; + case IEEE80211_IF_TYPE_STA: + fc |= IEEE80211_FCTL_TODS; + /* BSSID SA DA */ + memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + hdrlen = 24; + break; + case IEEE80211_IF_TYPE_IBSS: + /* DA SA BSSID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); + hdrlen = 24; + break; + default: + ret = 0; + goto fail; + } + + /* receiver is QoS enabled, use a QoS type frame */ + sta = sta_info_get(local, hdr.addr1); + if (sta) { + if (sta->flags & WLAN_STA_WME) { + fc |= IEEE80211_STYPE_QOS_DATA; + hdrlen += 2; + } + sta_info_put(sta); + } + + hdr.frame_control = cpu_to_le16(fc); + hdr.duration_id = 0; + hdr.seq_ctrl = 0; + + skip_header_bytes = ETH_HLEN; + if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { + encaps_data = bridge_tunnel_header; + encaps_len = sizeof(bridge_tunnel_header); + skip_header_bytes -= 2; + } else if (ethertype >= 0x600) { + encaps_data = rfc1042_header; + encaps_len = sizeof(rfc1042_header); + skip_header_bytes -= 2; + } else { + encaps_data = NULL; + encaps_len = 0; + } + + skb_pull(skb, skip_header_bytes); + nh_pos -= skip_header_bytes; + h_pos -= skip_header_bytes; + + /* TODO: implement support for fragments so that there is no need to + * reallocate and copy payload; it might be enough to support one + * extra fragment that would be copied in the beginning of the frame + * data.. anyway, it would be nice to include this into skb structure + * somehow + * + * There are few options for this: + * use skb->cb as an extra space for 802.11 header + * allocate new buffer if not enough headroom + * make sure that there is enough headroom in every skb by increasing + * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and + * alloc_skb() (net/core/skbuff.c) + */ + head_need = hdrlen + encaps_len + local->tx_headroom; + head_need -= skb_headroom(skb); + + /* We are going to modify skb data, so make a copy of it if happens to + * be cloned. This could happen, e.g., with Linux bridge code passing + * us broadcast frames. */ + + if (head_need > 0 || skb_cloned(skb)) { +#if 0 + printk(KERN_DEBUG "%s: need to reallocate buffer for %d bytes " + "of headroom\n", dev->name, head_need); +#endif + + if (skb_cloned(skb)) + I802_DEBUG_INC(local->tx_expand_skb_head_cloned); + else + I802_DEBUG_INC(local->tx_expand_skb_head); + /* Since we have to reallocate the buffer, make sure that there + * is enough room for possible WEP IV/ICV and TKIP (8 bytes + * before payload and 12 after). */ + if (pskb_expand_head(skb, (head_need > 0 ? head_need + 8 : 8), + 12, GFP_ATOMIC)) { + printk(KERN_DEBUG "%s: failed to reallocate TX buffer" + "\n", dev->name); + goto fail; + } + } + + if (encaps_data) { + memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); + nh_pos += encaps_len; + h_pos += encaps_len; + } + memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); + nh_pos += hdrlen; + h_pos += hdrlen; + + pkt_data = (struct ieee80211_tx_packet_data *)skb->cb; + memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); + pkt_data->ifindex = dev->ifindex; + if (sdata->type == IEEE80211_IF_TYPE_MGMT) + pkt_data->flags |= IEEE80211_TXPD_MGMT_IFACE; + + skb->dev = local->mdev; + sdata->stats.tx_packets++; + sdata->stats.tx_bytes += skb->len; + + /* Update skb pointers to various headers since this modified frame + * is going to go through Linux networking code that may potentially + * need things like pointer to IP header. */ + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, nh_pos); + skb_set_transport_header(skb, h_pos); + + dev->trans_start = jiffies; + dev_queue_xmit(skb); + + return 0; + + fail: + if (!ret) + dev_kfree_skb(skb); + + return ret; +} + +/* + * This is the transmit routine for the 802.11 type interfaces + * called by upper layers of the linux networking + * stack when it has a frame to transmit + */ +int ieee80211_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata; + struct ieee80211_tx_packet_data *pkt_data; + struct ieee80211_hdr *hdr; + u16 fc; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (skb->len < 10) { + dev_kfree_skb(skb); + return 0; + } + + if (skb_headroom(skb) < sdata->local->tx_headroom) { + if (pskb_expand_head(skb, sdata->local->tx_headroom, + 0, GFP_ATOMIC)) { + dev_kfree_skb(skb); + return 0; + } + } + + hdr = (struct ieee80211_hdr *) skb->data; + fc = le16_to_cpu(hdr->frame_control); + + pkt_data = (struct ieee80211_tx_packet_data *) skb->cb; + memset(pkt_data, 0, sizeof(struct ieee80211_tx_packet_data)); + pkt_data->ifindex = sdata->dev->ifindex; + if (sdata->type == IEEE80211_IF_TYPE_MGMT) + pkt_data->flags |= IEEE80211_TXPD_MGMT_IFACE; + + skb->priority = 20; /* use hardcoded priority for mgmt TX queue */ + skb->dev = sdata->local->mdev; + + /* + * We're using the protocol field of the the frame control header + * to request TX callback for hostapd. BIT(1) is checked. + */ + if ((fc & BIT(1)) == BIT(1)) { + pkt_data->flags |= IEEE80211_TXPD_REQ_TX_STATUS; + fc &= ~BIT(1); + hdr->frame_control = cpu_to_le16(fc); + } + + if (!(fc & IEEE80211_FCTL_PROTECTED)) + pkt_data->flags |= IEEE80211_TXPD_DO_NOT_ENCRYPT; + + sdata->stats.tx_packets++; + sdata->stats.tx_bytes += skb->len; + + dev_queue_xmit(skb); + + return 0; +} + +/* helper functions for pending packets for when queues are stopped */ + +void ieee80211_clear_tx_pending(struct ieee80211_local *local) +{ + int i, j; + struct ieee80211_tx_stored_packet *store; + + for (i = 0; i < local->hw.queues; i++) { + if (!__ieee80211_queue_pending(local, i)) + continue; + store = &local->pending_packet[i]; + kfree_skb(store->skb); + for (j = 0; j < store->num_extra_frag; j++) + kfree_skb(store->extra_frag[j]); + kfree(store->extra_frag); + clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[i]); + } +} + +void ieee80211_tx_pending(unsigned long data) +{ + struct ieee80211_local *local = (struct ieee80211_local *)data; + struct net_device *dev = local->mdev; + struct ieee80211_tx_stored_packet *store; + struct ieee80211_txrx_data tx; + int i, ret, reschedule = 0; + + netif_tx_lock_bh(dev); + for (i = 0; i < local->hw.queues; i++) { + if (__ieee80211_queue_stopped(local, i)) + continue; + if (!__ieee80211_queue_pending(local, i)) { + reschedule = 1; + continue; + } + store = &local->pending_packet[i]; + tx.u.tx.control = &store->control; + tx.u.tx.extra_frag = store->extra_frag; + tx.u.tx.num_extra_frag = store->num_extra_frag; + tx.u.tx.last_frag_hwrate = store->last_frag_hwrate; + tx.u.tx.last_frag_rate = store->last_frag_rate; + tx.flags = 0; + if (store->last_frag_rate_ctrl_probe) + tx.flags |= IEEE80211_TXRXD_TXPROBE_LAST_FRAG; + ret = __ieee80211_tx(local, store->skb, &tx); + if (ret) { + if (ret == IEEE80211_TX_FRAG_AGAIN) + store->skb = NULL; + } else { + clear_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[i]); + reschedule = 1; + } + } + netif_tx_unlock_bh(dev); + if (reschedule) { + if (!ieee80211_qdisc_installed(dev)) { + if (!__ieee80211_queue_stopped(local, 0)) + netif_wake_queue(dev); + } else + netif_schedule(dev); + } +} + +/* functions for drivers to get certain frames */ + +static void ieee80211_beacon_add_tim(struct ieee80211_local *local, + struct ieee80211_if_ap *bss, + struct sk_buff *skb) +{ + u8 *pos, *tim; + int aid0 = 0; + int i, have_bits = 0, n1, n2; + + /* Generate bitmap for TIM only if there are any STAs in power save + * mode. */ + read_lock_bh(&local->sta_lock); + if (atomic_read(&bss->num_sta_ps) > 0) + /* in the hope that this is faster than + * checking byte-for-byte */ + have_bits = !bitmap_empty((unsigned long*)bss->tim, + IEEE80211_MAX_AID+1); + + if (bss->dtim_count == 0) + bss->dtim_count = bss->dtim_period - 1; + else + bss->dtim_count--; + + tim = pos = (u8 *) skb_put(skb, 6); + *pos++ = WLAN_EID_TIM; + *pos++ = 4; + *pos++ = bss->dtim_count; + *pos++ = bss->dtim_period; + + if (bss->dtim_count == 0 && !skb_queue_empty(&bss->ps_bc_buf)) + aid0 = 1; + + if (have_bits) { + /* Find largest even number N1 so that bits numbered 1 through + * (N1 x 8) - 1 in the bitmap are 0 and number N2 so that bits + * (N2 + 1) x 8 through 2007 are 0. */ + n1 = 0; + for (i = 0; i < IEEE80211_MAX_TIM_LEN; i++) { + if (bss->tim[i]) { + n1 = i & 0xfe; + break; + } + } + n2 = n1; + for (i = IEEE80211_MAX_TIM_LEN - 1; i >= n1; i--) { + if (bss->tim[i]) { + n2 = i; + break; + } + } + + /* Bitmap control */ + *pos++ = n1 | aid0; + /* Part Virt Bitmap */ + memcpy(pos, bss->tim + n1, n2 - n1 + 1); + + tim[1] = n2 - n1 + 4; + skb_put(skb, n2 - n1); + } else { + *pos++ = aid0; /* Bitmap control */ + *pos++ = 0; /* Part Virt Bitmap */ + } + read_unlock_bh(&local->sta_lock); +} + +struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, int if_id, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sk_buff *skb; + struct net_device *bdev; + struct ieee80211_sub_if_data *sdata = NULL; + struct ieee80211_if_ap *ap = NULL; + struct ieee80211_rate *rate; + struct rate_control_extra extra; + u8 *b_head, *b_tail; + int bh_len, bt_len; + + bdev = dev_get_by_index(if_id); + if (bdev) { + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + ap = &sdata->u.ap; + dev_put(bdev); + } + + if (!ap || sdata->type != IEEE80211_IF_TYPE_AP || + !ap->beacon_head) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "no beacon data avail for idx=%d " + "(%s)\n", if_id, bdev ? bdev->name : "N/A"); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + return NULL; + } + + /* Assume we are generating the normal beacon locally */ + b_head = ap->beacon_head; + b_tail = ap->beacon_tail; + bh_len = ap->beacon_head_len; + bt_len = ap->beacon_tail_len; + + skb = dev_alloc_skb(local->tx_headroom + + bh_len + bt_len + 256 /* maximum TIM len */); + if (!skb) + return NULL; + + skb_reserve(skb, local->tx_headroom); + memcpy(skb_put(skb, bh_len), b_head, bh_len); + + ieee80211_include_sequence(sdata, (struct ieee80211_hdr *)skb->data); + + ieee80211_beacon_add_tim(local, ap, skb); + + if (b_tail) { + memcpy(skb_put(skb, bt_len), b_tail, bt_len); + } + + if (control) { + memset(&extra, 0, sizeof(extra)); + extra.mode = local->oper_hw_mode; + + rate = rate_control_get_rate(local, local->mdev, skb, &extra); + if (!rate) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: ieee80211_beacon_get: no rate " + "found\n", local->mdev->name); + } + dev_kfree_skb(skb); + return NULL; + } + + control->tx_rate = + ((sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE) && + (rate->flags & IEEE80211_RATE_PREAMBLE2)) ? + rate->val2 : rate->val; + control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; + control->power_level = local->hw.conf.power_level; + control->flags |= IEEE80211_TXCTL_NO_ACK; + control->retry_limit = 1; + control->flags |= IEEE80211_TXCTL_CLEAR_DST_MASK; + } + + ap->num_beacons++; + return skb; +} +EXPORT_SYMBOL(ieee80211_beacon_get); + +void ieee80211_rts_get(struct ieee80211_hw *hw, int if_id, + const void *frame, size_t frame_len, + const struct ieee80211_tx_control *frame_txctl, + struct ieee80211_rts *rts) +{ + const struct ieee80211_hdr *hdr = frame; + u16 fctl; + + fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS; + rts->frame_control = cpu_to_le16(fctl); + rts->duration = ieee80211_rts_duration(hw, if_id, frame_len, frame_txctl); + memcpy(rts->ra, hdr->addr1, sizeof(rts->ra)); + memcpy(rts->ta, hdr->addr2, sizeof(rts->ta)); +} +EXPORT_SYMBOL(ieee80211_rts_get); + +void ieee80211_ctstoself_get(struct ieee80211_hw *hw, int if_id, + const void *frame, size_t frame_len, + const struct ieee80211_tx_control *frame_txctl, + struct ieee80211_cts *cts) +{ + const struct ieee80211_hdr *hdr = frame; + u16 fctl; + + fctl = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS; + cts->frame_control = cpu_to_le16(fctl); + cts->duration = ieee80211_ctstoself_duration(hw, if_id, frame_len, frame_txctl); + memcpy(cts->ra, hdr->addr1, sizeof(cts->ra)); +} +EXPORT_SYMBOL(ieee80211_ctstoself_get); + +struct sk_buff * +ieee80211_get_buffered_bc(struct ieee80211_hw *hw, int if_id, + struct ieee80211_tx_control *control) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sk_buff *skb; + struct sta_info *sta; + ieee80211_tx_handler *handler; + struct ieee80211_txrx_data tx; + ieee80211_txrx_result res = TXRX_DROP; + struct net_device *bdev; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_ap *bss = NULL; + + bdev = dev_get_by_index(if_id); + if (bdev) { + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + bss = &sdata->u.ap; + dev_put(bdev); + } + if (!bss || sdata->type != IEEE80211_IF_TYPE_AP || !bss->beacon_head) + return NULL; + + if (bss->dtim_count != 0) + return NULL; /* send buffered bc/mc only after DTIM beacon */ + memset(control, 0, sizeof(*control)); + while (1) { + skb = skb_dequeue(&bss->ps_bc_buf); + if (!skb) + return NULL; + local->total_ps_buffered--; + + if (!skb_queue_empty(&bss->ps_bc_buf) && skb->len >= 2) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *) skb->data; + /* more buffered multicast/broadcast frames ==> set + * MoreData flag in IEEE 802.11 header to inform PS + * STAs */ + hdr->frame_control |= + cpu_to_le16(IEEE80211_FCTL_MOREDATA); + } + + if (ieee80211_tx_prepare(&tx, skb, local->mdev, control) == 0) + break; + dev_kfree_skb_any(skb); + } + sta = tx.sta; + tx.flags |= IEEE80211_TXRXD_TXPS_BUFFERED; + + for (handler = local->tx_handlers; *handler != NULL; handler++) { + res = (*handler)(&tx); + if (res == TXRX_DROP || res == TXRX_QUEUED) + break; + } + dev_put(tx.dev); + skb = tx.skb; /* handlers are allowed to change skb */ + + if (res == TXRX_DROP) { + I802_DEBUG_INC(local->tx_handlers_drop); + dev_kfree_skb(skb); + skb = NULL; + } else if (res == TXRX_QUEUED) { + I802_DEBUG_INC(local->tx_handlers_queued); + skb = NULL; + } + + if (sta) + sta_info_put(sta); + + return skb; +} +EXPORT_SYMBOL(ieee80211_get_buffered_bc); diff -puN /dev/null net/mac80211/util.c --- /dev/null +++ a/net/mac80211/util.c @@ -0,0 +1,488 @@ +/* + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007 Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * utilities for mac80211 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ieee80211_i.h" +#include "ieee80211_rate.h" +#include "wme.h" + +/* privid for wiphys to determine whether they belong to us or not */ +void *mac80211_wiphy_privid = &mac80211_wiphy_privid; + +/* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ +/* Ethernet-II snap header (RFC1042 for most EtherTypes) */ +const unsigned char rfc1042_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; + +/* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ +const unsigned char bridge_tunnel_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; + +/* No encapsulation header if EtherType < 0x600 (=length) */ +static const unsigned char eapol_header[] = + { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00, 0x88, 0x8e }; + + +static int rate_list_match(const int *rate_list, int rate) +{ + int i; + + if (!rate_list) + return 0; + + for (i = 0; rate_list[i] >= 0; i++) + if (rate_list[i] == rate) + return 1; + + return 0; +} + +void ieee80211_prepare_rates(struct ieee80211_local *local, + struct ieee80211_hw_mode *mode) +{ + int i; + + for (i = 0; i < mode->num_rates; i++) { + struct ieee80211_rate *rate = &mode->rates[i]; + + rate->flags &= ~(IEEE80211_RATE_SUPPORTED | + IEEE80211_RATE_BASIC); + + if (local->supp_rates[mode->mode]) { + if (!rate_list_match(local->supp_rates[mode->mode], + rate->rate)) + continue; + } + + rate->flags |= IEEE80211_RATE_SUPPORTED; + + /* Use configured basic rate set if it is available. If not, + * use defaults that are sane for most cases. */ + if (local->basic_rates[mode->mode]) { + if (rate_list_match(local->basic_rates[mode->mode], + rate->rate)) + rate->flags |= IEEE80211_RATE_BASIC; + } else switch (mode->mode) { + case MODE_IEEE80211A: + if (rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_IEEE80211B: + if (rate->rate == 10 || rate->rate == 20) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_ATHEROS_TURBO: + if (rate->rate == 120 || rate->rate == 240 || + rate->rate == 480) + rate->flags |= IEEE80211_RATE_BASIC; + break; + case MODE_IEEE80211G: + if (rate->rate == 10 || rate->rate == 20 || + rate->rate == 55 || rate->rate == 110) + rate->flags |= IEEE80211_RATE_BASIC; + break; + } + + /* Set ERP and MANDATORY flags based on phymode */ + switch (mode->mode) { + case MODE_IEEE80211A: + if (rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case MODE_IEEE80211B: + if (rate->rate == 10) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + case MODE_ATHEROS_TURBO: + break; + case MODE_IEEE80211G: + if (rate->rate == 10 || rate->rate == 20 || + rate->rate == 55 || rate->rate == 110 || + rate->rate == 60 || rate->rate == 120 || + rate->rate == 240) + rate->flags |= IEEE80211_RATE_MANDATORY; + break; + } + if (ieee80211_is_erp_rate(mode->mode, rate->rate)) + rate->flags |= IEEE80211_RATE_ERP; + } +} + +u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len) +{ + u16 fc; + + if (len < 24) + return NULL; + + fc = le16_to_cpu(hdr->frame_control); + + switch (fc & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case IEEE80211_FCTL_TODS: + return hdr->addr1; + case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + return NULL; + case IEEE80211_FCTL_FROMDS: + return hdr->addr2; + case 0: + return hdr->addr3; + } + break; + case IEEE80211_FTYPE_MGMT: + return hdr->addr3; + case IEEE80211_FTYPE_CTL: + if ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PSPOLL) + return hdr->addr1; + else + return NULL; + } + + return NULL; +} + +int ieee80211_get_hdrlen(u16 fc) +{ + int hdrlen = 24; + + switch (fc & IEEE80211_FCTL_FTYPE) { + case IEEE80211_FTYPE_DATA: + if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) + hdrlen = 30; /* Addr4 */ + /* + * The QoS Control field is two bytes and its presence is + * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to + * hdrlen if that bit is set. + * This works by masking out the bit and shifting it to + * bit position 1 so the result has the value 0 or 2. + */ + hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) + >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); + break; + case IEEE80211_FTYPE_CTL: + /* + * ACK and CTS are 10 bytes, all others 16. To see how + * to get this condition consider + * subtype mask: 0b0000000011110000 (0x00F0) + * ACK subtype: 0b0000000011010000 (0x00D0) + * CTS subtype: 0b0000000011000000 (0x00C0) + * bits that matter: ^^^ (0x00E0) + * value of those: 0b0000000011000000 (0x00C0) + */ + if ((fc & 0xE0) == 0xC0) + hdrlen = 10; + else + hdrlen = 16; + break; + } + + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen); + +int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *) skb->data; + int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); + if (unlikely(hdrlen > skb->len)) + return 0; + return hdrlen; +} +EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); + +int ieee80211_is_eapol(const struct sk_buff *skb) +{ + const struct ieee80211_hdr *hdr; + u16 fc; + int hdrlen; + + if (unlikely(skb->len < 10)) + return 0; + + hdr = (const struct ieee80211_hdr *) skb->data; + fc = le16_to_cpu(hdr->frame_control); + + if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + return 0; + + hdrlen = ieee80211_get_hdrlen(fc); + + if (unlikely(skb->len >= hdrlen + sizeof(eapol_header) && + memcmp(skb->data + hdrlen, eapol_header, + sizeof(eapol_header)) == 0)) + return 1; + + return 0; +} + +void ieee80211_tx_set_iswep(struct ieee80211_txrx_data *tx) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; + + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + if (tx->u.tx.extra_frag) { + struct ieee80211_hdr *fhdr; + int i; + for (i = 0; i < tx->u.tx.num_extra_frag; i++) { + fhdr = (struct ieee80211_hdr *) + tx->u.tx.extra_frag[i]->data; + fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } + } +} + +int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, + int rate, int erp, int short_preamble) +{ + int dur; + + /* calculate duration (in microseconds, rounded up to next higher + * integer if it includes a fractional microsecond) to send frame of + * len bytes (does not include FCS) at the given rate. Duration will + * also include SIFS. + * + * rate is in 100 kbps, so divident is multiplied by 10 in the + * DIV_ROUND_UP() operations. + */ + + if (local->hw.conf.phymode == MODE_IEEE80211A || erp || + local->hw.conf.phymode == MODE_ATHEROS_TURBO) { + /* + * OFDM: + * + * N_DBPS = DATARATE x 4 + * N_SYM = Ceiling((16+8xLENGTH+6) / N_DBPS) + * (16 = SIGNAL time, 6 = tail bits) + * TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext + * + * T_SYM = 4 usec + * 802.11a - 17.5.2: aSIFSTime = 16 usec + * 802.11g - 19.8.4: aSIFSTime = 10 usec + + * signal ext = 6 usec + */ + /* FIX: Atheros Turbo may have different (shorter) duration? */ + dur = 16; /* SIFS + signal ext */ + dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */ + dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */ + dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10, + 4 * rate); /* T_SYM x N_SYM */ + } else { + /* + * 802.11b or 802.11g with 802.11b compatibility: + * 18.3.4: TXTIME = PreambleLength + PLCPHeaderTime + + * Ceiling(((LENGTH+PBCC)x8)/DATARATE). PBCC=0. + * + * 802.11 (DS): 15.3.3, 802.11b: 18.3.4 + * aSIFSTime = 10 usec + * aPreambleLength = 144 usec or 72 usec with short preamble + * aPLCPHeaderLength = 48 usec or 24 usec with short preamble + */ + dur = 10; /* aSIFSTime = 10 usec */ + dur += short_preamble ? (72 + 24) : (144 + 48); + + dur += DIV_ROUND_UP(8 * (len + 4) * 10, rate); + } + + return dur; +} + +/* Exported duration function for driver use */ +__le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, int rate) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct net_device *bdev = dev_get_by_index(if_id); + struct ieee80211_sub_if_data *sdata; + u16 dur; + int erp; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + erp = ieee80211_is_erp_rate(hw->conf.phymode, rate); + dur = ieee80211_frame_duration(local, frame_len, rate, + erp, sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE); + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_generic_frame_duration); + +__le16 ieee80211_rts_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, + const struct ieee80211_tx_control *frame_txctl) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate; + struct net_device *bdev = dev_get_by_index(if_id); + struct ieee80211_sub_if_data *sdata; + int short_preamble; + int erp; + u16 dur; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE; + + rate = frame_txctl->rts_rate; + erp = !!(rate->flags & IEEE80211_RATE_ERP); + + /* CTS duration */ + dur = ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + /* Data frame duration */ + dur += ieee80211_frame_duration(local, frame_len, rate->rate, + erp, short_preamble); + /* ACK duration */ + dur += ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_rts_duration); + +__le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, int if_id, + size_t frame_len, + const struct ieee80211_tx_control *frame_txctl) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_rate *rate; + struct net_device *bdev = dev_get_by_index(if_id); + struct ieee80211_sub_if_data *sdata; + int short_preamble; + int erp; + u16 dur; + + if (unlikely(!bdev)) + return 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(bdev); + short_preamble = sdata->flags & IEEE80211_SDATA_SHORT_PREAMBLE; + + rate = frame_txctl->rts_rate; + erp = !!(rate->flags & IEEE80211_RATE_ERP); + + /* Data frame duration */ + dur = ieee80211_frame_duration(local, frame_len, rate->rate, + erp, short_preamble); + if (!(frame_txctl->flags & IEEE80211_TXCTL_NO_ACK)) { + /* ACK duration */ + dur += ieee80211_frame_duration(local, 10, rate->rate, + erp, short_preamble); + } + + dev_put(bdev); + return cpu_to_le16(dur); +} +EXPORT_SYMBOL(ieee80211_ctstoself_duration); + +struct ieee80211_rate * +ieee80211_get_rate(struct ieee80211_local *local, int phymode, int hw_rate) +{ + struct ieee80211_hw_mode *mode; + int r; + + list_for_each_entry(mode, &local->modes_list, list) { + if (mode->mode != phymode) + continue; + for (r = 0; r < mode->num_rates; r++) { + struct ieee80211_rate *rate = &mode->rates[r]; + if (rate->val == hw_rate || + (rate->flags & IEEE80211_RATE_PREAMBLE2 && + rate->val2 == hw_rate)) + return rate; + } + } + + return NULL; +} + +void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (test_and_clear_bit(IEEE80211_LINK_STATE_XOFF, + &local->state[queue])) { + if (test_bit(IEEE80211_LINK_STATE_PENDING, + &local->state[queue])) + tasklet_schedule(&local->tx_pending_tasklet); + else + if (!ieee80211_qdisc_installed(local->mdev)) { + if (queue == 0) + netif_wake_queue(local->mdev); + } else + __netif_schedule(local->mdev); + } +} +EXPORT_SYMBOL(ieee80211_wake_queue); + +void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + + if (!ieee80211_qdisc_installed(local->mdev) && queue == 0) + netif_stop_queue(local->mdev); + set_bit(IEEE80211_LINK_STATE_XOFF, &local->state[queue]); +} +EXPORT_SYMBOL(ieee80211_stop_queue); + +void ieee80211_start_queues(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + int i; + + for (i = 0; i < local->hw.queues; i++) + clear_bit(IEEE80211_LINK_STATE_XOFF, &local->state[i]); + if (!ieee80211_qdisc_installed(local->mdev)) + netif_start_queue(local->mdev); +} +EXPORT_SYMBOL(ieee80211_start_queues); + +void ieee80211_stop_queues(struct ieee80211_hw *hw) +{ + int i; + + for (i = 0; i < hw->queues; i++) + ieee80211_stop_queue(hw, i); +} +EXPORT_SYMBOL(ieee80211_stop_queues); + +void ieee80211_wake_queues(struct ieee80211_hw *hw) +{ + int i; + + for (i = 0; i < hw->queues; i++) + ieee80211_wake_queue(hw, i); +} +EXPORT_SYMBOL(ieee80211_wake_queues); diff -puN net/mac80211/wep.c~git-net net/mac80211/wep.c --- a/net/mac80211/wep.c~git-net +++ a/net/mac80211/wep.c @@ -67,7 +67,7 @@ void ieee80211_wep_get_iv(struct ieee802 struct ieee80211_key *key, u8 *iv) { local->wep_iv++; - if (ieee80211_wep_weak_iv(local->wep_iv, key->keylen)) + if (ieee80211_wep_weak_iv(local->wep_iv, key->conf.keylen)) local->wep_iv += 0x0100; if (!iv) @@ -76,7 +76,7 @@ void ieee80211_wep_get_iv(struct ieee802 *iv++ = (local->wep_iv >> 16) & 0xff; *iv++ = (local->wep_iv >> 8) & 0xff; *iv++ = local->wep_iv & 0xff; - *iv++ = key->keyidx << 6; + *iv++ = key->conf.keyidx << 6; } @@ -159,10 +159,10 @@ int ieee80211_wep_encrypt(struct ieee802 u8 *rc4key, *iv; size_t len; - if (!key || key->alg != ALG_WEP) + if (!key || key->conf.alg != ALG_WEP) return -1; - klen = 3 + key->keylen; + klen = 3 + key->conf.keylen; rc4key = kmalloc(klen, GFP_ATOMIC); if (!rc4key) return -1; @@ -179,7 +179,7 @@ int ieee80211_wep_encrypt(struct ieee802 memcpy(rc4key, iv, 3); /* Copy rest of the WEP key (the secret part) */ - memcpy(rc4key + 3, key->key, key->keylen); + memcpy(rc4key + 3, key->conf.key, key->conf.keylen); /* Add room for ICV */ skb_put(skb, WEP_ICV_LEN); @@ -251,10 +251,10 @@ int ieee80211_wep_decrypt(struct ieee802 keyidx = skb->data[hdrlen + 3] >> 6; - if (!key || keyidx != key->keyidx || key->alg != ALG_WEP) + if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) return -1; - klen = 3 + key->keylen; + klen = 3 + key->conf.keylen; rc4key = kmalloc(klen, GFP_ATOMIC); if (!rc4key) @@ -264,7 +264,7 @@ int ieee80211_wep_decrypt(struct ieee802 memcpy(rc4key, skb->data + hdrlen, 3); /* Copy rest of the WEP key (the secret part) */ - memcpy(rc4key + 3, key->key, key->keylen); + memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, skb->data + hdrlen + WEP_IV_LEN, @@ -321,7 +321,7 @@ u8 * ieee80211_wep_is_weak_iv(struct sk_ ivpos = skb->data + hdrlen; iv = (ivpos[0] << 16) | (ivpos[1] << 8) | ivpos[2]; - if (ieee80211_wep_weak_iv(iv, key->keylen)) + if (ieee80211_wep_weak_iv(iv, key->conf.keylen)) return ivpos; return NULL; diff -puN net/mac80211/wme.c~git-net net/mac80211/wme.c --- a/net/mac80211/wme.c~git-net +++ a/net/mac80211/wme.c @@ -18,70 +18,6 @@ #include "ieee80211_i.h" #include "wme.h" -static inline int WLAN_FC_IS_QOS_DATA(u16 fc) -{ - return (fc & 0x8C) == 0x88; -} - - -ieee80211_txrx_result -ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx) -{ - u8 *data = rx->skb->data; - int tid; - - /* does the frame have a qos control field? */ - if (WLAN_FC_IS_QOS_DATA(rx->fc)) { - u8 *qc = data + ieee80211_get_hdrlen(rx->fc) - QOS_CONTROL_LEN; - /* frame has qos control */ - tid = qc[0] & QOS_CONTROL_TID_MASK; - } else { - if (unlikely((rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)) { - /* Separate TID for management frames */ - tid = NUM_RX_DATA_QUEUES - 1; - } else { - /* no qos control present */ - tid = 0; /* 802.1d - Best Effort */ - } - } -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - I802_DEBUG_INC(rx->local->wme_rx_queue[tid]); - if (rx->sta) { - I802_DEBUG_INC(rx->sta->wme_rx_queue[tid]); - } -#endif /* CONFIG_MAC80211_DEBUG_COUNTERS */ - - rx->u.rx.queue = tid; - /* Set skb->priority to 1d tag if highest order bit of TID is not set. - * For now, set skb->priority to 0 for other cases. */ - rx->skb->priority = (tid > 7) ? 0 : tid; - - return TXRX_CONTINUE; -} - - -ieee80211_txrx_result -ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx) -{ - u16 fc = rx->fc; - u8 *data = rx->skb->data; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) data; - - if (!WLAN_FC_IS_QOS_DATA(fc)) - return TXRX_CONTINUE; - - /* remove the qos control field, update frame type and meta-data */ - memmove(data + 2, data, ieee80211_get_hdrlen(fc) - 2); - hdr = (struct ieee80211_hdr *) skb_pull(rx->skb, 2); - /* change frame type to non QOS */ - rx->fc = fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); - - return TXRX_CONTINUE; -} - - -#ifdef CONFIG_NET_SCHED /* maximum number of hardware queues we support. */ #define TC_80211_MAX_QUEUES 8 @@ -172,7 +108,7 @@ static inline int classify80211(struct s return IEEE80211_TX_QUEUE_DATA0; } - if (unlikely(pkt_data->mgmt_iface)) { + if (unlikely(pkt_data->flags & IEEE80211_TXPD_MGMT_IFACE)) { /* Data frames from hostapd (mainly, EAPOL) use AC_VO * and they will include QoS control fields if * the target STA is using WME. */ @@ -189,14 +125,13 @@ static inline int classify80211(struct s } /* use the data classifier to determine what 802.1d tag the - * data frame has */ + * data frame has */ skb->priority = classify_1d(skb, qd); - /* incase we are a client verify acm is not set for this ac */ + /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { - /* No AC with lower priority has acm=0, - * drop packet. */ + /* No AC with lower priority has acm=0, drop packet. */ return -1; } } @@ -217,7 +152,7 @@ static int wme_qdiscop_enqueue(struct sk struct Qdisc *qdisc; int err, queue; - if (pkt_data->requeue) { + if (pkt_data->flags & IEEE80211_TXPD_REQUEUE) { skb_queue_tail(&q->requeued[pkt_data->queue], skb); qd->q.qlen++; return 0; @@ -675,4 +610,3 @@ void ieee80211_wme_unregister(void) { unregister_qdisc(&wme_qdisc_ops); } -#endif /* CONFIG_NET_SCHED */ diff -puN net/mac80211/wme.h~git-net net/mac80211/wme.h --- a/net/mac80211/wme.h~git-net +++ a/net/mac80211/wme.h @@ -24,11 +24,10 @@ #define QOS_CONTROL_TAG1D_MASK 0x07 -ieee80211_txrx_result -ieee80211_rx_h_parse_qos(struct ieee80211_txrx_data *rx); - -ieee80211_txrx_result -ieee80211_rx_h_remove_qos_control(struct ieee80211_txrx_data *rx); +static inline int WLAN_FC_IS_QOS_DATA(u16 fc) +{ + return (fc & 0x8C) == 0x88; +} #ifdef CONFIG_NET_SCHED void ieee80211_install_qdisc(struct net_device *dev); diff -puN net/mac80211/wpa.c~git-net net/mac80211/wpa.c --- a/net/mac80211/wpa.c~git-net +++ a/net/mac80211/wpa.c @@ -11,10 +11,8 @@ #include #include #include -#include - #include -#include "ieee80211_common.h" + #include "ieee80211_i.h" #include "michael.h" #include "tkip.h" @@ -84,15 +82,15 @@ ieee80211_tx_h_michael_mic_add(struct ie fc = tx->fc; - if (!tx->key || tx->key->alg != ALG_TKIP || skb->len < 24 || + if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; if (ieee80211_get_hdr_info(skb, &sa, &da, &qos_tid, &data, &data_len)) return TXRX_DROP; - if (!tx->key->force_sw_encrypt && - !tx->fragmented && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && + !(tx->flags & IEEE80211_TXRXD_FRAGMENTED) && !(tx->local->hw.flags & IEEE80211_HW_TKIP_INCLUDE_MMIC) && !wpa_test) { /* hwaccel - with no need for preallocated room for Michael MIC @@ -116,8 +114,8 @@ ieee80211_tx_h_michael_mic_add(struct ie #else authenticator = 1; #endif - key = &tx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; + key = &tx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_TX_MIC_KEY : + ALG_TKIP_TEMP_AUTH_RX_MIC_KEY]; mic = skb_put(skb, MICHAEL_MIC_LEN); michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); @@ -137,17 +135,18 @@ ieee80211_rx_h_michael_mic_verify(struct fc = rx->fc; - /* If device handles decryption totally, skip this check */ - if ((rx->local->hw.flags & IEEE80211_HW_DEVICE_HIDES_WEP) || - (rx->local->hw.flags & IEEE80211_HW_DEVICE_STRIPS_MIC)) + /* + * No way to verify the MIC if the hardware stripped it + */ + if (rx->local->hw.flags & IEEE80211_HW_DEVICE_STRIPS_MIC) return TXRX_CONTINUE; - if (!rx->key || rx->key->alg != ALG_TKIP || + if (!rx->key || rx->key->conf.alg != ALG_TKIP || !(rx->fc & IEEE80211_FCTL_PROTECTED) || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !rx->key->force_sw_encrypt) { + (rx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { if (rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) { if (skb->len < MICHAEL_MIC_LEN) return TXRX_DROP; @@ -155,7 +154,7 @@ ieee80211_rx_h_michael_mic_verify(struct /* Need to verify Michael MIC sometimes in software even when * hwaccel is used. Atheros ar5212: fragmented frames and QoS * frames. */ - if (!rx->fragmented && !wpa_test) + if (!(rx->flags & IEEE80211_TXRXD_FRAGMENTED) && !wpa_test) goto remove_mic; } @@ -170,43 +169,19 @@ ieee80211_rx_h_michael_mic_verify(struct #else authenticator = 1; #endif - key = &rx->key->key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : - ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; + key = &rx->key->conf.key[authenticator ? ALG_TKIP_TEMP_AUTH_RX_MIC_KEY : + ALG_TKIP_TEMP_AUTH_TX_MIC_KEY]; michael_mic(key, da, sa, qos_tid & 0x0f, data, data_len, mic); if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { - if (!rx->u.rx.ra_match) + if (!(rx->flags & IEEE80211_TXRXD_RXRA_MATCH)) return TXRX_DROP; printk(KERN_DEBUG "%s: invalid Michael MIC in data frame from " MAC_FMT "\n", rx->dev->name, MAC_ARG(sa)); - do { - struct ieee80211_hdr *hdr; - union iwreq_data wrqu; - char *buf = kmalloc(128, GFP_ATOMIC); - if (!buf) - break; - - /* TODO: needed parameters: count, key type, TSC */ - hdr = (struct ieee80211_hdr *) skb->data; - sprintf(buf, "MLME-MICHAELMICFAILURE.indication(" - "keyid=%d %scast addr=" MAC_FMT ")", - rx->key->keyidx, - hdr->addr1[0] & 0x01 ? "broad" : "uni", - MAC_ARG(hdr->addr2)); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = strlen(buf); - wireless_send_event(rx->dev, IWEVCUSTOM, &wrqu, buf); - kfree(buf); - } while (0); - - if (!rx->local->apdev) - return TXRX_DROP; - - ieee80211_rx_mgmt(rx->local, rx->skb, rx->u.rx.status, - ieee80211_msg_michael_mic_failure); - - return TXRX_QUEUED; + mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx, + (void *) skb->data); + return TXRX_DROP; } remove_mic: @@ -230,7 +205,11 @@ static int tkip_encrypt_skb(struct ieee8 hdrlen = ieee80211_get_hdrlen(fc); len = skb->len - hdrlen; - tailneed = !tx->key->force_sw_encrypt ? 0 : TKIP_ICV_LEN; + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + tailneed = 0; + else + tailneed = TKIP_ICV_LEN; + if ((skb_headroom(skb) < TKIP_IV_LEN || skb_tailroom(skb) < tailneed)) { I802_DEBUG_INC(tx->local->tx_expand_skb_head); @@ -248,7 +227,7 @@ static int tkip_encrypt_skb(struct ieee8 if (key->u.tkip.iv16 == 0) key->u.tkip.iv32++; - if (!tx->key->force_sw_encrypt) { + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { u32 flags = tx->local->hw.flags; hdr = (struct ieee80211_hdr *)skb->data; @@ -275,7 +254,7 @@ static int tkip_encrypt_skb(struct ieee8 ~IEEE80211_TXCTL_TKIP_NEW_PHASE1_KEY; } - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return 0; } @@ -300,18 +279,18 @@ ieee80211_tx_h_tkip_encrypt(struct ieee8 fc = le16_to_cpu(hdr->frame_control); - if (!key || key->alg != ALG_TKIP || !WLAN_FC_DATA_PRESENT(fc)) + if (!key || key->conf.alg != ALG_TKIP || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; tx->u.tx.control->icv_len = TKIP_ICV_LEN; tx->u.tx.control->iv_len = TKIP_IV_LEN; ieee80211_tx_set_iswep(tx); - if (!tx->key->force_sw_encrypt && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV) && !wpa_test) { /* hwaccel - with no need for preallocated room for IV/ICV */ - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return TXRX_CONTINUE; } @@ -343,7 +322,7 @@ ieee80211_rx_h_tkip_decrypt(struct ieee8 fc = le16_to_cpu(hdr->frame_control); hdrlen = ieee80211_get_hdrlen(fc); - if (!rx->key || rx->key->alg != ALG_TKIP || + if (!rx->key || rx->key->conf.alg != ALG_TKIP || !(rx->fc & IEEE80211_FCTL_PROTECTED) || (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) return TXRX_CONTINUE; @@ -352,7 +331,7 @@ ieee80211_rx_h_tkip_decrypt(struct ieee8 return TXRX_DROP; if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !rx->key->force_sw_encrypt) { + (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { if (!(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) { /* Hardware takes care of all processing, including * replay protection, so no need to continue here. */ @@ -496,7 +475,10 @@ static int ccmp_encrypt_skb(struct ieee8 hdrlen = ieee80211_get_hdrlen(fc); len = skb->len - hdrlen; - tailneed = !key->force_sw_encrypt ? 0 : CCMP_MIC_LEN; + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) + tailneed = 0; + else + tailneed = CCMP_MIC_LEN; if ((skb_headroom(skb) < CCMP_HDR_LEN || skb_tailroom(skb) < tailneed)) { @@ -520,11 +502,11 @@ static int ccmp_encrypt_skb(struct ieee8 break; } - ccmp_pn2hdr(pos, pn, key->keyidx); + ccmp_pn2hdr(pos, pn, key->conf.keyidx); - if (!key->force_sw_encrypt) { + if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { /* hwaccel - with preallocated room for CCMP header */ - tx->u.tx.control->key_idx = key->hw_key_idx; + tx->u.tx.control->key_idx = key->conf.hw_key_idx; return 0; } @@ -548,18 +530,18 @@ ieee80211_tx_h_ccmp_encrypt(struct ieee8 fc = le16_to_cpu(hdr->frame_control); - if (!key || key->alg != ALG_CCMP || !WLAN_FC_DATA_PRESENT(fc)) + if (!key || key->conf.alg != ALG_CCMP || !WLAN_FC_DATA_PRESENT(fc)) return TXRX_CONTINUE; tx->u.tx.control->icv_len = CCMP_MIC_LEN; tx->u.tx.control->iv_len = CCMP_HDR_LEN; ieee80211_tx_set_iswep(tx); - if (!tx->key->force_sw_encrypt && + if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) { /* hwaccel - with no need for preallocated room for CCMP " * header or MIC fields */ - tx->u.tx.control->key_idx = tx->key->hw_key_idx; + tx->u.tx.control->key_idx = tx->key->conf.hw_key_idx; return TXRX_CONTINUE; } @@ -568,7 +550,6 @@ ieee80211_tx_h_ccmp_encrypt(struct ieee8 if (tx->u.tx.extra_frag) { int i; - for (i = 0; i < tx->u.tx.num_extra_frag; i++) { if (ccmp_encrypt_skb(tx, tx->u.tx.extra_frag[i], test) < 0) @@ -594,7 +575,7 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee8 fc = le16_to_cpu(hdr->frame_control); hdrlen = ieee80211_get_hdrlen(fc); - if (!key || key->alg != ALG_CCMP || + if (!key || key->conf.alg != ALG_CCMP || !(rx->fc & IEEE80211_FCTL_PROTECTED) || (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA) return TXRX_CONTINUE; @@ -604,7 +585,7 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee8 return TXRX_DROP; if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !key->force_sw_encrypt && + (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(rx->local->hw.flags & IEEE80211_HW_WEP_INCLUDE_IV)) return TXRX_CONTINUE; @@ -625,7 +606,7 @@ ieee80211_rx_h_ccmp_decrypt(struct ieee8 } if ((rx->u.rx.status->flag & RX_FLAG_DECRYPTED) && - !key->force_sw_encrypt) { + (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { /* hwaccel has already decrypted frame and verified MIC */ } else { u8 *scratch, *b_0, *aad; diff -puN net/netlink/af_netlink.c~git-net net/netlink/af_netlink.c --- a/net/netlink/af_netlink.c~git-net +++ a/net/netlink/af_netlink.c @@ -88,7 +88,7 @@ struct netlink_sock { static inline struct netlink_sock *nlk_sk(struct sock *sk) { - return (struct netlink_sock *)sk; + return container_of(sk, struct netlink_sock, sk); } struct nl_pid_hash { diff -puN net/sched/sch_generic.c~git-net net/sched/sch_generic.c --- a/net/sched/sch_generic.c~git-net +++ a/net/sched/sch_generic.c @@ -256,6 +256,12 @@ static void dev_watchdog_down(struct net netif_tx_unlock_bh(dev); } +/** + * netif_carrier_on - set carrier + * @dev: network device + * + * Device has detected that carrier. + */ void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) @@ -264,6 +270,12 @@ void netif_carrier_on(struct net_device __netdev_watchdog_up(dev); } +/** + * netif_carrier_off - clear carrier + * @dev: network device + * + * Device has detected loss of carrier. + */ void netif_carrier_off(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) diff -puN net/sctp/endpointola.c~git-net net/sctp/endpointola.c --- a/net/sctp/endpointola.c~git-net +++ a/net/sctp/endpointola.c @@ -103,6 +103,7 @@ static struct sctp_endpoint *sctp_endpoi /* Use SCTP specific send buffer space queues. */ ep->sndbuf_policy = sctp_sndbuf_policy; + sk->sk_write_space = sctp_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); diff -puN net/sctp/ipv6.c~git-net net/sctp/ipv6.c --- a/net/sctp/ipv6.c~git-net +++ a/net/sctp/ipv6.c @@ -481,7 +481,7 @@ static int sctp_v6_cmp_addr(const union if (addr1->sa.sa_family != addr2->sa.sa_family) { if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET6 && - IPV6_ADDR_MAPPED == ipv6_addr_type(&addr2->v6.sin6_addr)) { + ipv6_addr_v4mapped(&addr2->v6.sin6_addr)) { if (addr2->v6.sin6_port == addr1->v4.sin_port && addr2->v6.sin6_addr.s6_addr32[3] == addr1->v4.sin_addr.s_addr) @@ -489,7 +489,7 @@ static int sctp_v6_cmp_addr(const union } if (addr2->sa.sa_family == AF_INET && addr1->sa.sa_family == AF_INET6 && - IPV6_ADDR_MAPPED == ipv6_addr_type(&addr1->v6.sin6_addr)) { + ipv6_addr_v4mapped(&addr1->v6.sin6_addr)) { if (addr1->v6.sin6_port == addr2->v4.sin_port && addr1->v6.sin6_addr.s6_addr32[3] == addr2->v4.sin_addr.s_addr) diff -puN net/sctp/protocol.c~git-net net/sctp/protocol.c --- a/net/sctp/protocol.c~git-net +++ a/net/sctp/protocol.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -82,6 +83,10 @@ static struct sctp_af *sctp_af_v6_specif struct kmem_cache *sctp_chunk_cachep __read_mostly; struct kmem_cache *sctp_bucket_cachep __read_mostly; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { @@ -969,6 +974,8 @@ SCTP_STATIC __init int sctp_init(void) int i; int status = -EINVAL; unsigned long goal; + unsigned long limit; + int max_share; int order; /* SCTP_DEBUG sanity check. */ @@ -1059,6 +1066,31 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); + /* Set the pressure threshold to be a fraction of global memory that + * is up to 1/2 at 256 MB, decreasing toward zero with the amount of + * memory, with a floor of 128 pages. + * Note this initalizes the data in sctpv6_prot too + * Unabashedly stolen from tcp_init + */ + limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_sctp_mem[0] = limit / 4 * 3; + sysctl_sctp_mem[1] = limit; + sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2; + + /* Set per-socket limits to no more than 1/128 the pressure threshold*/ + limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7); + max_share = min(4UL*1024*1024, limit); + + sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */ + sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); + + sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM; + sysctl_sctp_wmem[1] = 16*1024; + sysctl_sctp_wmem[2] = max(64*1024, max_share); + /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. */ diff -puN net/sctp/sm_statefuns.c~git-net net/sctp/sm_statefuns.c --- a/net/sctp/sm_statefuns.c~git-net +++ a/net/sctp/sm_statefuns.c @@ -5263,10 +5263,8 @@ static int sctp_eat_data(const struct sc sctp_verb_t deliver; int tmp; __u32 tsn; - int account_value; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; - int rcvbuf_over = 0; data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); @@ -5276,48 +5274,6 @@ static int sctp_eat_data(const struct sc /* ASSERT: Now skb->data is really the user data. */ - /* - * If we are established, and we have used up our receive buffer - * memory, think about droping the frame. - * Note that we have an opportunity to improve performance here. - * If we accept one chunk from an skbuff, we have to keep all the - * memory of that skbuff around until the chunk is read into user - * space. Therefore, once we accept 1 chunk we may as well accept all - * remaining chunks in the skbuff. The data_accepted flag helps us do - * that. - */ - if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) { - /* - * If the receive buffer policy is 1, then each - * association can allocate up to sk_rcvbuf bytes - * otherwise, all the associations in aggregate - * may allocate up to sk_rcvbuf bytes - */ - if (asoc->ep->rcvbuf_policy) - account_value = atomic_read(&asoc->rmem_alloc); - else - account_value = atomic_read(&sk->sk_rmem_alloc); - if (account_value > sk->sk_rcvbuf) { - /* - * We need to make forward progress, even when we are - * under memory pressure, so we always allow the - * next tsn after the ctsn ack point to be accepted. - * This lets us avoid deadlocks in which we have to - * drop frames that would otherwise let us drain the - * receive queue. - */ - if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn) - return SCTP_IERROR_IGNORE_TSN; - - /* - * We're going to accept the frame but we should renege - * to make space for it. This will send us down that - * path later in this function. - */ - rcvbuf_over = 1; - } - } - /* Process ECN based congestion. * * Since the chunk structure is reused for all chunks within @@ -5377,18 +5333,9 @@ static int sctp_eat_data(const struct sc * seems a bit troublesome in that frag_point varies based on * PMTU. In cases, such as loopback, this might be a rather * large spill over. - * NOTE: If we have a full receive buffer here, we only renege if - * our receiver can still make progress without the tsn being - * received. We do this because in the event that the associations - * receive queue is empty we are filling a leading gap, and since - * reneging moves the gap to the end of the tsn stream, we are likely - * to stall again very shortly. Avoiding the renege when we fill a - * leading gap is a good heuristic for avoiding such steady state - * stalls. - */ - if (!asoc->rwnd || asoc->rwnd_over || - (datalen > asoc->rwnd + asoc->frag_point) || - (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) { + */ + if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over || + (datalen > asoc->rwnd + asoc->frag_point))) { /* If this is the next TSN, consider reneging to make * room. Note: Playing nice with a confused sender. A @@ -5409,6 +5356,21 @@ static int sctp_eat_data(const struct sc } /* + * Also try to renege to limit our memory usage in the event that + * we are under memory pressure + * If we can't renege, don't worry about it, the sk_stream_rmem_schedule + * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our + * memory usage too much + */ + if (*sk->sk_prot_creator->memory_pressure) { + if (sctp_tsnmap_has_gap(map) && + (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { + SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); + deliver = SCTP_CMD_RENEGE; + } + } + + /* * Section 3.3.10.9 No User Data (9) * * Cause of error diff -puN net/sctp/socket.c~git-net net/sctp/socket.c --- a/net/sctp/socket.c~git-net +++ a/net/sctp/socket.c @@ -107,23 +107,42 @@ static void sctp_sock_migrate(struct soc struct sctp_association *, sctp_socket_type_t); static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG; +extern struct kmem_cache *sctp_bucket_cachep; +extern int sysctl_sctp_mem[3]; +extern int sysctl_sctp_rmem[3]; +extern int sysctl_sctp_wmem[3]; + +int sctp_memory_pressure; +atomic_t sctp_memory_allocated; +atomic_t sctp_sockets_allocated; + +static void sctp_enter_memory_pressure(void) +{ + sctp_memory_pressure = 1; +} + + /* Get the sndbuf space available at the time on the association. */ static inline int sctp_wspace(struct sctp_association *asoc) { - struct sock *sk = asoc->base.sk; - int amt = 0; + int amt; + + if (asoc->ep->sndbuf_policy) + amt = asoc->sndbuf_used; + else + amt = atomic_read(&asoc->base.sk->sk_wmem_alloc); - if (asoc->ep->sndbuf_policy) { - /* make sure that no association uses more than sk_sndbuf */ - amt = sk->sk_sndbuf - asoc->sndbuf_used; + if (amt >= asoc->base.sk->sk_sndbuf) { + if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK) + amt = 0; + else { + amt = sk_stream_wspace(asoc->base.sk); + if (amt < 0) + amt = 0; + } } else { - /* do socket level accounting */ - amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc); + amt = asoc->base.sk->sk_sndbuf - amt; } - - if (amt < 0) - amt = 0; - return amt; } @@ -155,6 +174,7 @@ static inline void sctp_set_owner_w(stru sizeof(struct sctp_chunk); atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + sk_charge_skb(sk, chunk->skb); } /* Verify that this is a valid address. */ @@ -3315,6 +3335,7 @@ SCTP_STATIC int sctp_init_sock(struct so sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); + atomic_inc(&sctp_sockets_allocated); return 0; } @@ -3328,7 +3349,7 @@ SCTP_STATIC int sctp_destroy_sock(struct /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - + atomic_dec(&sctp_sockets_allocated); return 0; } @@ -5750,6 +5771,12 @@ static void sctp_wfree(struct sk_buff *s atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc); + /* + * This undoes what is done via sk_charge_skb + */ + sk->sk_wmem_queued -= skb->truesize; + sk->sk_forward_alloc += skb->truesize; + sock_wfree(skb); __sctp_write_space(asoc); @@ -5767,6 +5794,11 @@ void sctp_sock_rfree(struct sk_buff *skb struct sctp_ulpevent *event = sctp_skb2event(skb); atomic_sub(event->rmem_len, &sk->sk_rmem_alloc); + + /* + * Mimic the behavior of sk_stream_rfree + */ + sk->sk_forward_alloc += event->rmem_len; } @@ -6156,6 +6188,7 @@ static void sctp_sock_migrate(struct soc sctp_release_sock(newsk); } + /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", @@ -6178,6 +6211,12 @@ struct proto sctp_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp_sock), + .sysctl_mem = sysctl_sctp_mem, + .sysctl_rmem = sysctl_sctp_rmem, + .sysctl_wmem = sysctl_sctp_wmem, + .memory_pressure = &sctp_memory_pressure, + .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -6202,5 +6241,11 @@ struct proto sctpv6_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp6_sock), + .sysctl_mem = sysctl_sctp_mem, + .sysctl_rmem = sysctl_sctp_rmem, + .sysctl_wmem = sysctl_sctp_wmem, + .memory_pressure = &sctp_memory_pressure, + .enter_memory_pressure = sctp_enter_memory_pressure, + .memory_allocated = &sctp_memory_allocated, }; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ diff -puN net/sctp/sysctl.c~git-net net/sctp/sysctl.c --- a/net/sctp/sysctl.c~git-net +++ a/net/sctp/sysctl.c @@ -52,6 +52,15 @@ static int int_max = INT_MAX; static long sack_timer_min = 1; static long sack_timer_max = 500; +int sysctl_sctp_mem[3]; +int sysctl_sctp_rmem[3]; +int sysctl_sctp_wmem[3]; + +/* + * per assoc memory limitationf for sends + */ +int sysctl_sctp_wmem[3]; + static ctl_table sctp_table[] = { { .ctl_name = NET_SCTP_RTO_INITIAL, @@ -226,6 +235,30 @@ static ctl_table sctp_table[] = { .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_mem", + .data = &sysctl_sctp_mem, + .maxlen = sizeof(sysctl_sctp_mem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_rmem", + .data = &sysctl_sctp_rmem, + .maxlen = sizeof(sysctl_sctp_rmem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sctp_wmem", + .data = &sysctl_sctp_wmem, + .maxlen = sizeof(sysctl_sctp_wmem), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; diff -puN net/sctp/ulpevent.c~git-net net/sctp/ulpevent.c --- a/net/sctp/ulpevent.c~git-net +++ a/net/sctp/ulpevent.c @@ -685,6 +685,24 @@ struct sctp_ulpevent *sctp_ulpevent_make struct sctp_ulpevent *event = NULL; struct sk_buff *skb; size_t padding, len; + int rx_count; + + /* + * check to see if we need to make space for this + * new skb, expand the rcvbuffer if needed, or drop + * the frame + */ + if (asoc->ep->rcvbuf_policy) + rx_count = atomic_read(&asoc->rmem_alloc); + else + rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc); + + if (rx_count >= asoc->base.sk->sk_rcvbuf) { + + if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) || + (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb))) + goto fail; + } /* Clone the original skb, sharing the data. */ skb = skb_clone(chunk->skb, gfp); diff -puN net/sctp/ulpqueue.c~git-net net/sctp/ulpqueue.c --- a/net/sctp/ulpqueue.c~git-net +++ a/net/sctp/ulpqueue.c @@ -1027,6 +1027,7 @@ void sctp_ulpq_renege(struct sctp_ulpq * sctp_ulpq_partial_delivery(ulpq, chunk, gfp); } + sk_stream_mem_reclaim(asoc->base.sk); return; } diff -puN net/sunrpc/svcsock.c~git-net net/sunrpc/svcsock.c --- a/net/sunrpc/svcsock.c~git-net +++ a/net/sunrpc/svcsock.c @@ -19,6 +19,7 @@ * Copyright (C) 1995, 1996 Olaf Kirch */ +#include #include #include #include @@ -877,7 +878,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) } else { rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_respages = rqstp->rq_pages + 1 + - (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE; + DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); } if (serv->sv_stats) diff -puN net/wireless/radiotap.c~git-net net/wireless/radiotap.c --- a/net/wireless/radiotap.c~git-net +++ a/net/wireless/radiotap.c @@ -161,7 +161,11 @@ int ieee80211_radiotap_iterator_next( [IEEE80211_RADIOTAP_DBM_TX_POWER] = 0x11, [IEEE80211_RADIOTAP_ANTENNA] = 0x11, [IEEE80211_RADIOTAP_DB_ANTSIGNAL] = 0x11, - [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11 + [IEEE80211_RADIOTAP_DB_ANTNOISE] = 0x11, + [IEEE80211_RADIOTAP_RX_FLAGS] = 0x22, + [IEEE80211_RADIOTAP_TX_FLAGS] = 0x22, + [IEEE80211_RADIOTAP_RTS_RETRIES] = 0x11, + [IEEE80211_RADIOTAP_DATA_RETRIES] = 0x11, /* * add more here as they are defined in * include/net/ieee80211_radiotap.h diff -puN net/wireless/wext.c~git-net net/wireless/wext.c --- a/net/wireless/wext.c~git-net +++ a/net/wireless/wext.c @@ -1129,10 +1129,12 @@ static int rtnetlink_fill_iwinfo(struct { struct ifinfomsg *r; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r)); - r = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, 0, 0, type, sizeof(*r), 0); + if (nlh == NULL) + return -EMSGSIZE; + + r = nlmsg_data(nlh); r->ifi_family = AF_UNSPEC; r->__ifi_pad = 0; r->ifi_type = dev->type; @@ -1141,15 +1143,13 @@ static int rtnetlink_fill_iwinfo(struct r->ifi_change = 0; /* Wireless changes don't affect those flags */ /* Add the wireless events in the netlink packet */ - RTA_PUT(skb, IFLA_WIRELESS, event_len, event); + NLA_PUT(skb, IFLA_WIRELESS, event_len, event); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } /* ---------------------------------------------------------------- */ @@ -1162,17 +1162,19 @@ rtattr_failure: static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len) { struct sk_buff *skb; - int size = NLMSG_GOODSIZE; + int err; - skb = alloc_skb(size, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return; - if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, - event, event_len) < 0) { + err = rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK, event, event_len); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); kfree_skb(skb); return; } + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; skb_queue_tail(&wireless_nlevent_queue, skb); tasklet_schedule(&wireless_nlevent_tasklet); diff -puN net/xfrm/xfrm_policy.c~git-net net/xfrm/xfrm_policy.c --- a/net/xfrm/xfrm_policy.c~git-net +++ a/net/xfrm/xfrm_policy.c @@ -1477,7 +1477,7 @@ restart: pol_dead = 0; xfrm_nr = 0; - if (sk && sk->sk_policy[1]) { + if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (IS_ERR(policy)) return PTR_ERR(policy); diff -puN net/xfrm/xfrm_user.c~git-net net/xfrm/xfrm_user.c --- a/net/xfrm/xfrm_user.c~git-net +++ a/net/xfrm/xfrm_user.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -33,23 +32,21 @@ #endif #include -static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) +static inline int alg_len(struct xfrm_algo *alg) { - struct rtattr *rt = xfrma[type - 1]; + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + +static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) +{ + struct nlattr *rt = attrs[type]; struct xfrm_algo *algp; - int len; if (!rt) return 0; - len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); - if (len < 0) - return -EINVAL; - - algp = RTA_DATA(rt); - - len -= (algp->alg_key_len + 7U) / 8; - if (len < 0) + algp = nla_data(rt); + if (nla_len(rt) < alg_len(algp)) return -EINVAL; switch (type) { @@ -77,55 +74,25 @@ static int verify_one_alg(struct rtattr return 0; } -static int verify_encap_tmpl(struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_ENCAP - 1]; - struct xfrm_encap_tmpl *encap; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap)) - return -EINVAL; - - return 0; -} - -static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, +static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, xfrm_address_t **addrp) { - struct rtattr *rt = xfrma[type - 1]; + struct nlattr *rt = attrs[type]; - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) - return -EINVAL; - - if (addrp) - *addrp = RTA_DATA(rt); - - return 0; + if (rt && addrp) + *addrp = nla_data(rt); } -static inline int verify_sec_ctx_len(struct rtattr **xfrma) +static inline int verify_sec_ctx_len(struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; - int len = 0; if (!rt) return 0; - if (rt->rta_len < sizeof(*uctx)) - return -EINVAL; - - uctx = RTA_DATA(rt); - - len += sizeof(struct xfrm_user_sec_ctx); - len += uctx->ctx_len; - - if (uctx->len != len) + uctx = nla_data(rt); + if (uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) return -EINVAL; return 0; @@ -133,7 +100,7 @@ static inline int verify_sec_ctx_len(str static int verify_newsa_info(struct xfrm_usersa_info *p, - struct rtattr **xfrma) + struct nlattr **attrs) { int err; @@ -157,35 +124,35 @@ static int verify_newsa_info(struct xfrm err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if (!xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ALG_COMP-1]) + if (!attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_ESP: - if ((!xfrma[XFRMA_ALG_AUTH-1] && - !xfrma[XFRMA_ALG_CRYPT-1]) || - xfrma[XFRMA_ALG_COMP-1]) + if ((!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_CRYPT]) || + attrs[XFRMA_ALG_COMP]) goto out; break; case IPPROTO_COMP: - if (!xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1]) + if (!attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT]) goto out; break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: - if (xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ENCAP-1] || - xfrma[XFRMA_SEC_CTX-1] || - !xfrma[XFRMA_COADDR-1]) + if (attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_ALG_AUTH] || + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_ENCAP] || + attrs[XFRMA_SEC_CTX] || + !attrs[XFRMA_COADDR]) goto out; break; #endif @@ -194,17 +161,13 @@ static int verify_newsa_info(struct xfrm goto out; } - if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH))) - goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) goto out; - if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) goto out; - if ((err = verify_encap_tmpl(xfrma))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP))) goto out; - if ((err = verify_sec_ctx_len(xfrma))) - goto out; - if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + if ((err = verify_sec_ctx_len(attrs))) goto out; err = -EINVAL; @@ -227,25 +190,22 @@ out: static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct xfrm_algo_desc *(*get_byname)(char *, int), - struct rtattr *u_arg) + struct nlattr *rta) { - struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; - int len; if (!rta) return 0; - ualg = RTA_DATA(rta); + ualg = nla_data(rta); algo = get_byname(ualg->alg_name, 1); if (!algo) return -ENOSYS; *props = algo->desc.sadb_alg_id; - len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; - p = kmemdup(ualg, len, GFP_KERNEL); + p = kmemdup(ualg, alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; @@ -254,24 +214,6 @@ static int attach_one_algo(struct xfrm_a return 0; } -static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - struct xfrm_encap_tmpl *p, *uencap; - - if (!rta) - return 0; - - uencap = RTA_DATA(rta); - p = kmemdup(uencap, sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - *encapp = p; - return 0; -} - - static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { int len = 0; @@ -283,34 +225,6 @@ static inline int xfrm_user_sec_ctx_size return len; } -static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) -{ - struct xfrm_user_sec_ctx *uctx; - - if (!u_arg) - return 0; - - uctx = RTA_DATA(u_arg); - return security_xfrm_state_alloc(x, uctx); -} - -static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - xfrm_address_t *p, *uaddrp; - - if (!rta) - return 0; - - uaddrp = RTA_DATA(rta); - p = kmemdup(uaddrp, sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - *addrpp = p; - return 0; -} - static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -336,53 +250,30 @@ static void copy_from_user_state(struct * somehow made shareable and move it to xfrm_state.c - JHS * */ -static int xfrm_update_ae_params(struct xfrm_state *x, struct rtattr **xfrma) +static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs) { - int err = - EINVAL; - struct rtattr *rp = xfrma[XFRMA_REPLAY_VAL-1]; - struct rtattr *lt = xfrma[XFRMA_LTIME_VAL-1]; - struct rtattr *et = xfrma[XFRMA_ETIMER_THRESH-1]; - struct rtattr *rt = xfrma[XFRMA_REPLAY_THRESH-1]; + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; + struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; + struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; if (rp) { - struct xfrm_replay_state *replay; - if (RTA_PAYLOAD(rp) < sizeof(*replay)) - goto error; - replay = RTA_DATA(rp); - memcpy(&x->replay, replay, sizeof(*replay)); - memcpy(&x->preplay, replay, sizeof(*replay)); + nla_memcpy(&x->replay, rp, sizeof(x->replay)); + nla_memcpy(&x->preplay, rp, sizeof(x->preplay)); } - if (lt) { - struct xfrm_lifetime_cur *ltime; - if (RTA_PAYLOAD(lt) < sizeof(*ltime)) - goto error; - ltime = RTA_DATA(lt); - x->curlft.bytes = ltime->bytes; - x->curlft.packets = ltime->packets; - x->curlft.add_time = ltime->add_time; - x->curlft.use_time = ltime->use_time; - } + if (lt) + nla_memcpy(&x->curlft, lt, sizeof(x->curlft)); - if (et) { - if (RTA_PAYLOAD(et) < sizeof(u32)) - goto error; - x->replay_maxage = *(u32*)RTA_DATA(et); - } + if (et) + x->replay_maxage = nla_get_u32(et); - if (rt) { - if (RTA_PAYLOAD(rt) < sizeof(u32)) - goto error; - x->replay_maxdiff = *(u32*)RTA_DATA(rt); - } - - return 0; -error: - return err; + if (rt) + x->replay_maxdiff = nla_get_u32(rt); } static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, - struct rtattr **xfrma, + struct nlattr **attrs, int *errp) { struct xfrm_state *x = xfrm_state_alloc(); @@ -395,25 +286,37 @@ static struct xfrm_state *xfrm_state_con if ((err = attach_one_algo(&x->aalg, &x->props.aalgo, xfrm_aalg_get_byname, - xfrma[XFRMA_ALG_AUTH-1]))) + attrs[XFRMA_ALG_AUTH]))) goto error; if ((err = attach_one_algo(&x->ealg, &x->props.ealgo, xfrm_ealg_get_byname, - xfrma[XFRMA_ALG_CRYPT-1]))) + attrs[XFRMA_ALG_CRYPT]))) goto error; if ((err = attach_one_algo(&x->calg, &x->props.calgo, xfrm_calg_get_byname, - xfrma[XFRMA_ALG_COMP-1]))) - goto error; - if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) - goto error; - if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + attrs[XFRMA_ALG_COMP]))) goto error; + + if (attrs[XFRMA_ENCAP]) { + x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), + sizeof(*x->encap), GFP_KERNEL); + if (x->encap == NULL) + goto error; + } + + if (attrs[XFRMA_COADDR]) { + x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), + sizeof(*x->coaddr), GFP_KERNEL); + if (x->coaddr == NULL) + goto error; + } + err = xfrm_init_state(x); if (err) goto error; - if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1]))) + if (attrs[XFRMA_SEC_CTX] && + security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) goto error; x->km.seq = p->seq; @@ -426,9 +329,7 @@ static struct xfrm_state *xfrm_state_con /* override default values from above */ - err = xfrm_update_ae_params(x, (struct rtattr **)xfrma); - if (err < 0) - goto error; + xfrm_update_ae_params(x, attrs); return x; @@ -441,18 +342,18 @@ error_no_put: } static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; struct km_event c; - err = verify_newsa_info(p, xfrma); + err = verify_newsa_info(p, attrs); if (err) return err; - x = xfrm_state_construct(p, xfrma, &err); + x = xfrm_state_construct(p, attrs, &err); if (!x) return err; @@ -482,7 +383,7 @@ out: } static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, - struct rtattr **xfrma, + struct nlattr **attrs, int *errp) { struct xfrm_state *x = NULL; @@ -494,10 +395,7 @@ static struct xfrm_state *xfrm_user_stat } else { xfrm_address_t *saddr = NULL; - err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); - if (err) - goto out; - + verify_one_addr(attrs, XFRMA_SRCADDR, &saddr); if (!saddr) { err = -EINVAL; goto out; @@ -515,14 +413,14 @@ static struct xfrm_state *xfrm_user_stat } static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; int err = -ESRCH; struct km_event c; - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); - x = xfrm_user_state_lookup(p, xfrma, &err); + x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) return err; @@ -576,6 +474,27 @@ struct xfrm_dump_info { int this_idx; }; +static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) +{ + int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; + struct xfrm_user_sec_ctx *uctx; + struct nlattr *attr; + + attr = nla_reserve(skb, XFRMA_SEC_CTX, ctx_size); + if (attr == NULL) + return -EMSGSIZE; + + uctx = nla_data(attr); + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = s->ctx_doi; + uctx->ctx_alg = s->ctx_alg; + uctx->ctx_len = s->ctx_len; + memcpy(uctx + 1, s->ctx_str, s->ctx_len); + + return 0; +} + static int dump_one_state(struct xfrm_state *x, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -583,60 +502,45 @@ static int dump_one_state(struct xfrm_st struct sk_buff *skb = sp->out_skb; struct xfrm_usersa_info *p; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); if (sp->this_idx < sp->start_idx) goto out; - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWSA, sizeof(*p)); - nlh->nlmsg_flags = sp->nlmsg_flags; + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); copy_to_user_state(x, p); if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); + NLA_PUT(skb, XFRMA_ALG_AUTH, alg_len(x->aalg), x->aalg); if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); + NLA_PUT(skb, XFRMA_ALG_CRYPT, alg_len(x->ealg), x->ealg); if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); - if (x->security) { - int ctx_size = sizeof(struct xfrm_sec_ctx) + - x->security->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = x->security->ctx_doi; - uctx->ctx_alg = x->security->ctx_alg; - uctx->ctx_len = x->security->ctx_len; - memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); - } + if (x->security && copy_sec_ctx(x->security, skb) < 0) + goto nla_put_failure; if (x->coaddr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + NLA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); if (x->lastused) - RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); + NLA_PUT_U64(skb, XFRMA_LASTUSED, x->lastused); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); out: sp->this_idx++; return 0; -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) @@ -661,7 +565,7 @@ static struct sk_buff *xfrm_state_netlin struct xfrm_dump_info info; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); @@ -679,6 +583,13 @@ static struct sk_buff *xfrm_state_netlin return skb; } +static inline size_t xfrm_spdinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_spdinfo)) + + nla_total_size(sizeof(struct xfrmu_spdhinfo)); +} + static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; @@ -714,18 +625,14 @@ nla_put_failure: } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct sk_buff *r_skb; - u32 *flags = NLMSG_DATA(nlh); + u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; u32 seq = nlh->nlmsg_seq; - int len = NLMSG_LENGTH(sizeof(u32)); - len += RTA_SPACE(sizeof(struct xfrmu_spdinfo)); - len += RTA_SPACE(sizeof(struct xfrmu_spdhinfo)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -735,6 +642,13 @@ static int xfrm_get_spdinfo(struct sk_bu return nlmsg_unicast(xfrm_nl, r_skb, spid); } +static inline size_t xfrm_sadinfo_msgsize(void) +{ + return NLMSG_ALIGN(4) + + nla_total_size(sizeof(struct xfrmu_sadhinfo)) + + nla_total_size(4); /* XFRMA_SAD_CNT */ +} + static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; @@ -764,19 +678,14 @@ nla_put_failure: } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct sk_buff *r_skb; - u32 *flags = NLMSG_DATA(nlh); + u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; u32 seq = nlh->nlmsg_seq; - int len = NLMSG_LENGTH(sizeof(u32)); - - len += RTA_SPACE(sizeof(struct xfrmu_sadhinfo)); - len += RTA_SPACE(sizeof(u32)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -787,14 +696,14 @@ static int xfrm_get_sadinfo(struct sk_bu } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_usersa_id *p = NLMSG_DATA(nlh); + struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; - x = xfrm_user_state_lookup(p, xfrma, &err); + x = xfrm_user_state_lookup(p, attrs, &err); if (x == NULL) goto out_noput; @@ -802,8 +711,7 @@ static int xfrm_get_sa(struct sk_buff *s if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); } xfrm_state_put(x); out_noput: @@ -834,7 +742,7 @@ static int verify_userspi_info(struct xf } static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct xfrm_userspi_info *p; @@ -843,7 +751,7 @@ static int xfrm_alloc_userspi(struct sk_ int family; int err; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); err = verify_userspi_info(p); if (err) goto out_noput; @@ -884,8 +792,7 @@ static int xfrm_alloc_userspi(struct sk_ goto out; } - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); out: xfrm_state_put(x); @@ -964,15 +871,15 @@ static int verify_newpolicy_info(struct return verify_policy_dir(p->dir); } -static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; if (!rt) return 0; - uctx = RTA_DATA(rt); + uctx = nla_data(rt); return security_xfrm_policy_alloc(pol, uctx); } @@ -1032,38 +939,35 @@ static int validate_tmpl(int nr, struct return 0; } -static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) +static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct nlattr *rt = attrs[XFRMA_TMPL]; if (!rt) { pol->xfrm_nr = 0; } else { - struct xfrm_user_tmpl *utmpl = RTA_DATA(rt); - int nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl); + struct xfrm_user_tmpl *utmpl = nla_data(rt); + int nr = nla_len(rt) / sizeof(*utmpl); int err; err = validate_tmpl(nr, utmpl, pol->family); if (err) return err; - copy_templates(pol, RTA_DATA(rt), nr); + copy_templates(pol, utmpl, nr); } return 0; } -static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs) { - struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct nlattr *rt = attrs[XFRMA_POLICY_TYPE]; struct xfrm_userpolicy_type *upt; u8 type = XFRM_POLICY_TYPE_MAIN; int err; if (rt) { - if (rt->rta_len < sizeof(*upt)) - return -EINVAL; - - upt = RTA_DATA(rt); + upt = nla_data(rt); type = upt->type; } @@ -1101,7 +1005,7 @@ static void copy_to_user_policy(struct x p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); int err; @@ -1113,12 +1017,12 @@ static struct xfrm_policy *xfrm_policy_c copy_from_user_policy(xp, p); - err = copy_from_user_policy_type(&xp->type, xfrma); + err = copy_from_user_policy_type(&xp->type, attrs); if (err) goto error; - if (!(err = copy_from_user_tmpl(xp, xfrma))) - err = copy_from_user_sec_ctx(xp, xfrma); + if (!(err = copy_from_user_tmpl(xp, attrs))) + err = copy_from_user_sec_ctx(xp, attrs); if (err) goto error; @@ -1130,9 +1034,9 @@ static struct xfrm_policy *xfrm_policy_c } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh); + struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; int err; @@ -1141,11 +1045,11 @@ static int xfrm_add_policy(struct sk_buf err = verify_newpolicy_info(p); if (err) return err; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; - xp = xfrm_policy_construct(p, xfrma, &err); + xp = xfrm_policy_construct(p, attrs, &err); if (!xp) return err; @@ -1197,32 +1101,9 @@ static int copy_to_user_tmpl(struct xfrm up->ealgos = kp->ealgos; up->calgos = kp->calgos; } - RTA_PUT(skb, XFRMA_TMPL, - (sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr), - vec); - return 0; - -rtattr_failure: - return -1; -} - -static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) -{ - int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = s->ctx_doi; - uctx->ctx_alg = s->ctx_alg; - uctx->ctx_len = s->ctx_len; - memcpy(uctx + 1, s->ctx_str, s->ctx_len); - return 0; - - rtattr_failure: - return -1; + return nla_put(skb, XFRMA_TMPL, + sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr, vec); } static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) @@ -1240,21 +1121,23 @@ static inline int copy_to_user_sec_ctx(s } return 0; } +static inline size_t userpolicy_type_attrsize(void) +{ +#ifdef CONFIG_XFRM_SUB_POLICY + return nla_total_size(sizeof(struct xfrm_userpolicy_type)); +#else + return 0; +#endif +} #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { - struct xfrm_userpolicy_type upt; + struct xfrm_userpolicy_type upt = { + .type = type, + }; - memset(&upt, 0, sizeof(upt)); - upt.type = type; - - RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); - - return 0; - -rtattr_failure: - return -1; + return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } #else @@ -1271,17 +1154,16 @@ static int dump_one_policy(struct xfrm_p struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); if (sp->this_idx < sp->start_idx) goto out; - nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, - sp->nlmsg_seq, - XFRM_MSG_NEWPOLICY, sizeof(*p)); - p = NLMSG_DATA(nlh); - nlh->nlmsg_flags = sp->nlmsg_flags; + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); + if (nlh == NULL) + return -EMSGSIZE; + p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; @@ -1290,14 +1172,14 @@ static int dump_one_policy(struct xfrm_p if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); out: sp->this_idx++; return 0; nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) @@ -1326,7 +1208,7 @@ static struct sk_buff *xfrm_policy_netli struct xfrm_dump_info info; struct sk_buff *skb; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); @@ -1345,7 +1227,7 @@ static struct sk_buff *xfrm_policy_netli } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; @@ -1354,10 +1236,10 @@ static int xfrm_get_policy(struct sk_buf struct km_event c; int delete; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; @@ -1368,16 +1250,16 @@ static int xfrm_get_policy(struct sk_buf if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); else { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; @@ -1396,9 +1278,8 @@ static int xfrm_get_policy(struct sk_buf if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = netlink_unicast(xfrm_nl, resp_skb, - NETLINK_CB(skb).pid, - MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, resp_skb, + NETLINK_CB(skb).pid); } } else { xfrm_audit_log(NETLINK_CB(skb).loginuid, NETLINK_CB(skb).sid, @@ -1420,10 +1301,10 @@ out: } static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct km_event c; - struct xfrm_usersa_flush *p = NLMSG_DATA(nlh); + struct xfrm_usersa_flush *p = nlmsg_data(nlh); struct xfrm_audit audit_info; int err; @@ -1441,18 +1322,25 @@ static int xfrm_flush_sa(struct sk_buff return 0; } +static inline size_t xfrm_aevent_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + + nla_total_size(sizeof(struct xfrm_replay_state)) + + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size(4) /* XFRM_AE_RTHR */ + + nla_total_size(4); /* XFRM_AE_ETHR */ +} static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; - struct xfrm_lifetime_cur ltime; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id)); - id = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + if (nlh == NULL) + return -EMSGSIZE; + id = nlmsg_data(nlh); memcpy(&id->sa_id.daddr, &x->id.daddr,sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; @@ -1461,54 +1349,34 @@ static int build_aevent(struct sk_buff * id->reqid = x->props.reqid; id->flags = c->data.aevent; - RTA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); + NLA_PUT(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft); - ltime.bytes = x->curlft.bytes; - ltime.packets = x->curlft.packets; - ltime.add_time = x->curlft.add_time; - ltime.use_time = x->curlft.use_time; + if (id->flags & XFRM_AE_RTHR) + NLA_PUT_U32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); - RTA_PUT(skb, XFRMA_LTIME_VAL, sizeof(struct xfrm_lifetime_cur), <ime); + if (id->flags & XFRM_AE_ETHR) + NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH, + x->replay_maxage * 10 / HZ); - if (id->flags&XFRM_AE_RTHR) { - RTA_PUT(skb,XFRMA_REPLAY_THRESH,sizeof(u32),&x->replay_maxdiff); - } - - if (id->flags&XFRM_AE_ETHR) { - u32 etimer = x->replay_maxage*10/HZ; - RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer); - } - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -rtattr_failure: -nlmsg_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct sk_buff *r_skb; int err; struct km_event c; - struct xfrm_aevent_id *p = NLMSG_DATA(nlh); - int len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id)); + struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; - len += RTA_SPACE(sizeof(struct xfrm_replay_state)); - len += RTA_SPACE(sizeof(struct xfrm_lifetime_cur)); - - if (p->flags&XFRM_AE_RTHR) - len+=RTA_SPACE(sizeof(u32)); - - if (p->flags&XFRM_AE_ETHR) - len+=RTA_SPACE(sizeof(u32)); - - r_skb = alloc_skb(len, GFP_ATOMIC); + r_skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; @@ -1530,22 +1398,21 @@ static int xfrm_get_ae(struct sk_buff *s if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = netlink_unicast(xfrm_nl, r_skb, - NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; } static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; struct km_event c; int err = - EINVAL; - struct xfrm_aevent_id *p = NLMSG_DATA(nlh); - struct rtattr *rp = xfrma[XFRMA_REPLAY_VAL-1]; - struct rtattr *lt = xfrma[XFRMA_LTIME_VAL-1]; + struct xfrm_aevent_id *p = nlmsg_data(nlh); + struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; + struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; if (!lt && !rp) return err; @@ -1562,10 +1429,8 @@ static int xfrm_new_ae(struct sk_buff *s goto out; spin_lock_bh(&x->lock); - err = xfrm_update_ae_params(x, xfrma); + xfrm_update_ae_params(x, attrs); spin_unlock_bh(&x->lock); - if (err < 0) - goto out; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1579,14 +1444,14 @@ out: } static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct xfrm_audit audit_info; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; @@ -1604,31 +1469,31 @@ static int xfrm_flush_policy(struct sk_b } static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; - struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); + struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; - err = copy_from_user_policy_type(&type, xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; if (p->index) xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); else { - struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1]; + struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_policy tmp; - err = verify_sec_ctx_len(xfrma); + err = verify_sec_ctx_len(attrs); if (err) return err; memset(&tmp, 0, sizeof(struct xfrm_policy)); if (rt) { - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + struct xfrm_user_sec_ctx *uctx = nla_data(rt); if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; @@ -1665,11 +1530,11 @@ out: } static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_state *x; int err; - struct xfrm_user_expire *ue = NLMSG_DATA(nlh); + struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); @@ -1697,14 +1562,14 @@ out: } static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; - struct rtattr *rt = xfrma[XFRMA_TMPL-1]; + struct nlattr *rt = attrs[XFRMA_TMPL]; - struct xfrm_user_acquire *ua = NLMSG_DATA(nlh); + struct xfrm_user_acquire *ua = nlmsg_data(nlh); struct xfrm_state *x = xfrm_state_alloc(); int err = -ENOMEM; @@ -1719,7 +1584,7 @@ static int xfrm_add_acquire(struct sk_bu } /* build an XP */ - xp = xfrm_policy_construct(&ua->policy, (struct rtattr **) xfrma, &err); + xp = xfrm_policy_construct(&ua->policy, attrs, &err); if (!xp) { kfree(x); return err; @@ -1729,7 +1594,7 @@ static int xfrm_add_acquire(struct sk_bu memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); - ut = RTA_DATA(rt); + ut = nla_data(rt); /* extract the templates and for each call km_key */ for (i = 0; i < xp->xfrm_nr; i++, ut++) { struct xfrm_tmpl *t = &xp->xfrm_vec[i]; @@ -1751,29 +1616,15 @@ static int xfrm_add_acquire(struct sk_bu } #ifdef CONFIG_XFRM_MIGRATE -static int verify_user_migrate(struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; - struct xfrm_user_migrate *um; - - if (!rt) - return -EINVAL; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(*um)) - return -EINVAL; - - return 0; -} - static int copy_from_user_migrate(struct xfrm_migrate *ma, - struct rtattr **xfrma, int *num) + struct nlattr **attrs, int *num) { - struct rtattr *rt = xfrma[XFRMA_MIGRATE-1]; + struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; - um = RTA_DATA(rt); - num_migrate = (rt->rta_len - sizeof(*rt)) / sizeof(*um); + um = nla_data(rt); + num_migrate = nla_len(rt) / sizeof(*um); if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) return -EINVAL; @@ -1797,24 +1648,23 @@ static int copy_from_user_migrate(struct } static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { - struct xfrm_userpolicy_id *pi = NLMSG_DATA(nlh); + struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; u8 type; int err; int n = 0; - err = verify_user_migrate((struct rtattr **)xfrma); - if (err) - return err; + if (attrs[XFRMA_MIGRATE] == NULL) + return -EINVAL; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + err = copy_from_user_policy_type(&type, attrs); if (err) return err; err = copy_from_user_migrate((struct xfrm_migrate *)m, - (struct rtattr **)xfrma, &n); + attrs, &n); if (err) return err; @@ -1827,7 +1677,7 @@ static int xfrm_do_migrate(struct sk_buf } #else static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct rtattr **xfrma) + struct nlattr **attrs) { return -ENOPROTOOPT; } @@ -1849,11 +1699,14 @@ static int copy_to_user_migrate(struct x memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); - RTA_PUT(skb, XFRMA_MIGRATE, sizeof(um), &um); - return 0; + return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); +} -rtattr_failure: - return -1; +static inline size_t xfrm_migrate_msgsize(int num_migrate) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, @@ -1863,13 +1716,13 @@ static int build_migrate(struct sk_buff struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); int i; - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id)); - pol_id = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); + if (nlh == NULL) + return -EMSGSIZE; + pol_id = nlmsg_data(nlh); /* copy data from selector, dir, and type to the pol_id */ memset(pol_id, 0, sizeof(*pol_id)); memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); @@ -1883,25 +1736,18 @@ static int build_migrate(struct sk_buff goto nlmsg_failure; } - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_migrate) * num_migrate); - len += NLMSG_SPACE(sizeof(struct xfrm_userpolicy_id)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -1909,9 +1755,7 @@ static int xfrm_send_migrate(struct xfrm if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_MIGRATE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, - GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, @@ -1921,7 +1765,7 @@ static int xfrm_send_migrate(struct xfrm } #endif -#define XMSGSIZE(type) NLMSG_LENGTH(sizeof(struct type)) +#define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), @@ -1937,19 +1781,36 @@ static const int xfrm_msg_min[XFRM_NR_MS [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), - [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), + [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), - [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), - [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)), + [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), }; #undef XMSGSIZE +static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { + [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, + [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, + [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, + [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) }, + [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, + [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, + [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, + [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, + [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, + [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, + [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, +}; + static struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, struct rtattr **); + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); int (*dump)(struct sk_buff *, struct netlink_callback *); } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, @@ -1977,9 +1838,9 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { - struct rtattr *xfrma[XFRMA_MAX]; + struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_link *link; - int type, min_len; + int type, err; type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) @@ -2001,30 +1862,15 @@ static int xfrm_user_rcv_msg(struct sk_b return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL); } - memset(xfrma, 0, sizeof(xfrma)); - - if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type])) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned short flavor = attr->rta_type; - if (flavor) { - if (flavor > XFRMA_MAX) - return -EINVAL; - xfrma[flavor - 1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } + err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, + xfrma_policy); + if (err < 0) + return err; if (link->doit == NULL) return -EINVAL; - return link->doit(skb, nlh, xfrma); + return link->doit(skb, nlh, attrs); } static void xfrm_netlink_rcv(struct sock *sk, int len) @@ -2039,60 +1885,53 @@ static void xfrm_netlink_rcv(struct sock } while (qlen); } +static inline size_t xfrm_expire_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)); +} + static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE, - sizeof(*ue)); - ue = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + if (nlh == NULL) + return -EMSGSIZE; + ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + return nlmsg_end(skb, nlh); } static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_user_expire)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_expire(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) { struct sk_buff *skb; - int len = NLMSG_LENGTH(sizeof(struct xfrm_aevent_id)); - len += RTA_SPACE(sizeof(struct xfrm_replay_state)); - len += RTA_SPACE(sizeof(struct xfrm_lifetime_cur)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_aevent(skb, x, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_AEVENTS; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) @@ -2100,42 +1939,37 @@ static int xfrm_notify_sa_flush(struct k struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)); + int len = NLMSG_ALIGN(sizeof(struct xfrm_usersa_flush)); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, - XFRM_MSG_FLUSHSA, sizeof(*p)); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + if (nlh == NULL) { + kfree_skb(skb); + return -EMSGSIZE; + } - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); p->proto = c->data.proto; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); - -nlmsg_failure: - kfree_skb(skb); - return -1; + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); } -static inline int xfrm_sa_len(struct xfrm_state *x) +static inline size_t xfrm_sa_len(struct xfrm_state *x) { - int l = 0; + size_t l = 0; if (x->aalg) - l += RTA_SPACE(sizeof(*x->aalg) + (x->aalg->alg_key_len+7)/8); + l += nla_total_size(alg_len(x->aalg)); if (x->ealg) - l += RTA_SPACE(sizeof(*x->ealg) + (x->ealg->alg_key_len+7)/8); + l += nla_total_size(alg_len(x->ealg)); if (x->calg) - l += RTA_SPACE(sizeof(*x->calg)); + l += nla_total_size(sizeof(*x->calg)); if (x->encap) - l += RTA_SPACE(sizeof(*x->encap)); + l += nla_total_size(sizeof(*x->encap)); return l; } @@ -2146,57 +1980,58 @@ static int xfrm_notify_sa(struct xfrm_st struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; int len = xfrm_sa_len(x); int headlen; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } - len += NLMSG_SPACE(headlen); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + if (nlh == NULL) + goto nla_put_failure; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; id->proto = x->id.proto; - p = RTA_DATA(__RTA_PUT(skb, XFRMA_SA, sizeof(*p))); + attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); + if (attr == NULL) + goto nla_put_failure; + + p = nla_data(attr); } copy_to_user_state(x, p); if (x->aalg) - RTA_PUT(skb, XFRMA_ALG_AUTH, - sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg); + NLA_PUT(skb, XFRMA_ALG_AUTH, alg_len(x->aalg), x->aalg); if (x->ealg) - RTA_PUT(skb, XFRMA_ALG_CRYPT, - sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg); + NLA_PUT(skb, XFRMA_ALG_CRYPT, alg_len(x->ealg), x->ealg); if (x->calg) - RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); + NLA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg); if (x->encap) - RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); -nlmsg_failure: -rtattr_failure: +nla_put_failure: kfree_skb(skb); return -1; } @@ -2224,20 +2059,28 @@ static int xfrm_send_state_notify(struct } +static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + + userpolicy_type_attrsize(); +} + static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); __u32 seq = xfrm_get_acqseq(); - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE, - sizeof(*ua)); - ua = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); + if (nlh == NULL) + return -EMSGSIZE; + ua = nlmsg_data(nlh); memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); @@ -2254,35 +2097,26 @@ static int build_acquire(struct sk_buff if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(x->security)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2344,18 +2178,26 @@ static struct xfrm_policy *xfrm_compile_ return xp; } +static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + + userpolicy_type_attrsize(); +} + static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, int dir, struct km_event *c) { struct xfrm_user_polexpire *upe; struct nlmsghdr *nlh; int hard = c->data.hard; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe)); - upe = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + if (nlh == NULL) + return -EMSGSIZE; + upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; @@ -2365,34 +2207,25 @@ static int build_polexpire(struct sk_buf goto nlmsg_failure; upe->hard = !!hard; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); nlmsg_failure: - nlmsg_trim(skb, b); - return -1; + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { struct sk_buff *skb; - size_t len; - len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); - len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire)); - len += RTA_SPACE(xfrm_user_sec_ctx_size(xp->security)); -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) @@ -2401,30 +2234,30 @@ static int xfrm_notify_policy(struct xfr struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); int headlen; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { - len += RTA_SPACE(headlen); + len += nla_total_size(headlen); headlen = sizeof(*id); } -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - len += NLMSG_SPACE(headlen); + len += userpolicy_type_attrsize(); + len += NLMSG_ALIGN(headlen); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - nlh = NLMSG_PUT(skb, c->pid, c->seq, c->event, headlen); + nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + if (nlh == NULL) + goto nlmsg_failure; - p = NLMSG_DATA(nlh); + p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { - id = NLMSG_DATA(nlh); + struct nlattr *attr; + + id = nlmsg_data(nlh); memset(id, 0, sizeof(*id)); id->dir = dir; if (c->data.byid) @@ -2432,10 +2265,12 @@ static int xfrm_notify_policy(struct xfr else memcpy(&id->sel, &xp->selector, sizeof(id->sel)); - p = RTA_DATA(__RTA_PUT(skb, XFRMA_POLICY, sizeof(*p))); - } + attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); + if (attr == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = 0; + p = nla_data(attr); + } copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) @@ -2443,13 +2278,11 @@ static int xfrm_notify_policy(struct xfr if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: -rtattr_failure: kfree_skb(skb); return -1; } @@ -2458,28 +2291,20 @@ static int xfrm_notify_policy_flush(stru { struct nlmsghdr *nlh; struct sk_buff *skb; - sk_buff_data_t b; - int len = 0; -#ifdef CONFIG_XFRM_SUB_POLICY - len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type)); -#endif - len += NLMSG_LENGTH(0); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - b = skb->tail; - - nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + if (nlh == NULL) + goto nlmsg_failure; if (copy_to_user_policy_type(c->data.type, skb) < 0) goto nlmsg_failure; - nlh->nlmsg_len = skb->tail - b; + nlmsg_end(skb, nlh); - NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2506,48 +2331,48 @@ static int xfrm_send_policy_notify(struc } +static inline size_t xfrm_report_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); +} + static int build_report(struct sk_buff *skb, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct xfrm_user_report *ur; struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); - ur = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur), 0); + if (nlh == NULL) + return -EMSGSIZE; + ur = nlmsg_data(nlh); ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); if (addr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + NLA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; + return nlmsg_end(skb, nlh); -nlmsg_failure: -rtattr_failure: - nlmsg_trim(skb, b); - return -1; +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; - size_t len; - len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); - skb = alloc_skb(len, GFP_ATOMIC); + skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_report(skb, proto, sel, addr) < 0) BUG(); - NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); } static struct xfrm_mgr netlink_mgr = { _