diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata new file mode 100644 index 0000000..0a93215 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ata @@ -0,0 +1,99 @@ +What: /sys/class/ata_... +Date: August 2008 +Contact: Gwendal Grignou +Description: + +Provide a place in sysfs for storing the ATA topology of the system. This allows +retrieving various information about ATA objects. + +Files under /sys/class/ata_port +------------------------------- + + For each port, a directory ataX is created where X is the ata_port_id of + the port. The device parent is the ata host device. + +idle_irq (read) + + Number of IRQ received by the port while idle [some ata HBA only]. + +nr_pmp_links (read) + + If a SATA Port Multiplier (PM) is connected, number of link behind it. + +Files under /sys/class/ata_link +------------------------------- + + Behind each port, there is a ata_link. If there is a SATA PM in the + topology, 15 ata_link objects are created. + + If a link is behind a port, the directory name is linkX, where X is + ata_port_id of the port. + If a link is behind a PM, its name is linkX.Y where X is ata_port_id + of the parent port and Y the PM port. + +hw_sata_spd_limit + + Maximum speed supported by the connected SATA device. + +sata_spd_limit + + Maximum speed imposed by libata. + +sata_spd + + Current speed of the link [1.5, 3Gps,...]. + +Files under /sys/class/ata_device +--------------------------------- + + Behind each link, up to two ata device are created. + The name of the directory is devX[.Y].Z where: + - X is ata_port_id of the port where the device is connected, + - Y the port of the PM if any, and + - Z the device id: for PATA, there is usually 2 devices [0,1], + only 1 for SATA. + +class + Device class. Can be "ata" for disk, "atapi" for packet device, + "pmp" for PM, or "none" if no device was found behind the link. + +dma_mode + + Transfer modes supported by the device when in DMA mode. + Mostly used by PATA device. + +pio_mode + + Transfer modes supported by the device when in PIO mode. + Mostly used by PATA device. + +xfer_mode + + Current transfer mode. + +id + + Cached result of IDENTIFY command, as described in ATA8 7.16 and 7.17. + Only valid if the device is not a PM. + +gscr + + Cached result of the dump of PM GSCR register. + Valid registers are: + 0: SATA_PMP_GSCR_PROD_ID, + 1: SATA_PMP_GSCR_REV, + 2: SATA_PMP_GSCR_PORT_INFO, + 32: SATA_PMP_GSCR_ERROR, + 33: SATA_PMP_GSCR_ERROR_EN, + 64: SATA_PMP_GSCR_FEAT, + 96: SATA_PMP_GSCR_FEAT_EN, + 130: SATA_PMP_GSCR_SII_GPIO + Only valid if the device is a PM. + +spdn_cnt + + Number of time libata decided to lower the speed of link due to errors. + +ering + + Formatted output of the error ring of the device. diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index ecd35e9..feca075 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -46,7 +46,6 @@ Atomic and pointer manipulation !Iarch/x86/include/asm/atomic.h -!Iarch/x86/include/asm/unaligned.h Delaying, scheduling, and timer routines diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index a20c6f6..6899f47 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -57,7 +57,6 @@ String Conversions -!Ilib/vsprintf.c !Elib/vsprintf.c String Manipulation diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 0b1a3f9..a0d479d 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1961,6 +1961,12 @@ machines due to caching. + + Mutex API reference +!Iinclude/linux/mutex.h +!Ekernel/mutex.c + + Further reading diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index e8473ea..b57a9ed 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -104,4 +104,9 @@ Block IO !Iinclude/trace/events/block.h + + + Workqueue +!Iinclude/trace/events/workqueue.h + diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index a406286..d111e3b 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -1,7 +1,5 @@ 00-INDEX - This file -barrier.txt - - I/O Barriers biodoc.txt - Notes on the Generic Block Layer Rewrite in Linux 2.5 capability.txt @@ -16,3 +14,5 @@ stat.txt - Block layer statistics in /sys/block//stat switching-sched.txt - Switching I/O schedulers at runtime +writeback_cache_control.txt + - Control of volatile write back caches diff --git a/Documentation/block/barrier.txt b/Documentation/block/barrier.txt deleted file mode 100644 index 2c2f24f..0000000 --- a/Documentation/block/barrier.txt +++ /dev/null @@ -1,261 +0,0 @@ -I/O Barriers -============ -Tejun Heo , July 22 2005 - -I/O barrier requests are used to guarantee ordering around the barrier -requests. Unless you're crazy enough to use disk drives for -implementing synchronization constructs (wow, sounds interesting...), -the ordering is meaningful only for write requests for things like -journal checkpoints. All requests queued before a barrier request -must be finished (made it to the physical medium) before the barrier -request is started, and all requests queued after the barrier request -must be started only after the barrier request is finished (again, -made it to the physical medium). - -In other words, I/O barrier requests have the following two properties. - -1. Request ordering - -Requests cannot pass the barrier request. Preceding requests are -processed before the barrier and following requests after. - -Depending on what features a drive supports, this can be done in one -of the following three ways. - -i. For devices which have queue depth greater than 1 (TCQ devices) and -support ordered tags, block layer can just issue the barrier as an -ordered request and the lower level driver, controller and drive -itself are responsible for making sure that the ordering constraint is -met. Most modern SCSI controllers/drives should support this. - -NOTE: SCSI ordered tag isn't currently used due to limitation in the - SCSI midlayer, see the following random notes section. - -ii. For devices which have queue depth greater than 1 but don't -support ordered tags, block layer ensures that the requests preceding -a barrier request finishes before issuing the barrier request. Also, -it defers requests following the barrier until the barrier request is -finished. Older SCSI controllers/drives and SATA drives fall in this -category. - -iii. Devices which have queue depth of 1. This is a degenerate case -of ii. Just keeping issue order suffices. Ancient SCSI -controllers/drives and IDE drives are in this category. - -2. Forced flushing to physical medium - -Again, if you're not gonna do synchronization with disk drives (dang, -it sounds even more appealing now!), the reason you use I/O barriers -is mainly to protect filesystem integrity when power failure or some -other events abruptly stop the drive from operating and possibly make -the drive lose data in its cache. So, I/O barriers need to guarantee -that requests actually get written to non-volatile medium in order. - -There are four cases, - -i. No write-back cache. Keeping requests ordered is enough. - -ii. Write-back cache but no flush operation. There's no way to -guarantee physical-medium commit order. This kind of devices can't to -I/O barriers. - -iii. Write-back cache and flush operation but no FUA (forced unit -access). We need two cache flushes - before and after the barrier -request. - -iv. Write-back cache, flush operation and FUA. We still need one -flush to make sure requests preceding a barrier are written to medium, -but post-barrier flush can be avoided by using FUA write on the -barrier itself. - - -How to support barrier requests in drivers ------------------------------------------- - -All barrier handling is done inside block layer proper. All low level -drivers have to are implementing its prepare_flush_fn and using one -the following two functions to indicate what barrier type it supports -and how to prepare flush requests. Note that the term 'ordered' is -used to indicate the whole sequence of performing barrier requests -including draining and flushing. - -typedef void (prepare_flush_fn)(struct request_queue *q, struct request *rq); - -int blk_queue_ordered(struct request_queue *q, unsigned ordered, - prepare_flush_fn *prepare_flush_fn); - -@q : the queue in question -@ordered : the ordered mode the driver/device supports -@prepare_flush_fn : this function should prepare @rq such that it - flushes cache to physical medium when executed - -For example, SCSI disk driver's prepare_flush_fn looks like the -following. - -static void sd_prepare_flush(struct request_queue *q, struct request *rq) -{ - memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->timeout = SD_TIMEOUT; - rq->cmd[0] = SYNCHRONIZE_CACHE; - rq->cmd_len = 10; -} - -The following seven ordered modes are supported. The following table -shows which mode should be used depending on what features a -device/driver supports. In the leftmost column of table, -QUEUE_ORDERED_ prefix is omitted from the mode names to save space. - -The table is followed by description of each mode. Note that in the -descriptions of QUEUE_ORDERED_DRAIN*, '=>' is used whereas '->' is -used for QUEUE_ORDERED_TAG* descriptions. '=>' indicates that the -preceding step must be complete before proceeding to the next step. -'->' indicates that the next step can start as soon as the previous -step is issued. - - write-back cache ordered tag flush FUA ------------------------------------------------------------------------ -NONE yes/no N/A no N/A -DRAIN no no N/A N/A -DRAIN_FLUSH yes no yes no -DRAIN_FUA yes no yes yes -TAG no yes N/A N/A -TAG_FLUSH yes yes yes no -TAG_FUA yes yes yes yes - - -QUEUE_ORDERED_NONE - I/O barriers are not needed and/or supported. - - Sequence: N/A - -QUEUE_ORDERED_DRAIN - Requests are ordered by draining the request queue and cache - flushing isn't needed. - - Sequence: drain => barrier - -QUEUE_ORDERED_DRAIN_FLUSH - Requests are ordered by draining the request queue and both - pre-barrier and post-barrier cache flushings are needed. - - Sequence: drain => preflush => barrier => postflush - -QUEUE_ORDERED_DRAIN_FUA - Requests are ordered by draining the request queue and - pre-barrier cache flushing is needed. By using FUA on barrier - request, post-barrier flushing can be skipped. - - Sequence: drain => preflush => barrier - -QUEUE_ORDERED_TAG - Requests are ordered by ordered tag and cache flushing isn't - needed. - - Sequence: barrier - -QUEUE_ORDERED_TAG_FLUSH - Requests are ordered by ordered tag and both pre-barrier and - post-barrier cache flushings are needed. - - Sequence: preflush -> barrier -> postflush - -QUEUE_ORDERED_TAG_FUA - Requests are ordered by ordered tag and pre-barrier cache - flushing is needed. By using FUA on barrier request, - post-barrier flushing can be skipped. - - Sequence: preflush -> barrier - - -Random notes/caveats --------------------- - -* SCSI layer currently can't use TAG ordering even if the drive, -controller and driver support it. The problem is that SCSI midlayer -request dispatch function is not atomic. It releases queue lock and -switch to SCSI host lock during issue and it's possible and likely to -happen in time that requests change their relative positions. Once -this problem is solved, TAG ordering can be enabled. - -* Currently, no matter which ordered mode is used, there can be only -one barrier request in progress. All I/O barriers are held off by -block layer until the previous I/O barrier is complete. This doesn't -make any difference for DRAIN ordered devices, but, for TAG ordered -devices with very high command latency, passing multiple I/O barriers -to low level *might* be helpful if they are very frequent. Well, this -certainly is a non-issue. I'm writing this just to make clear that no -two I/O barrier is ever passed to low-level driver. - -* Completion order. Requests in ordered sequence are issued in order -but not required to finish in order. Barrier implementation can -handle out-of-order completion of ordered sequence. IOW, the requests -MUST be processed in order but the hardware/software completion paths -are allowed to reorder completion notifications - eg. current SCSI -midlayer doesn't preserve completion order during error handling. - -* Requeueing order. Low-level drivers are free to requeue any request -after they removed it from the request queue with -blkdev_dequeue_request(). As barrier sequence should be kept in order -when requeued, generic elevator code takes care of putting requests in -order around barrier. See blk_ordered_req_seq() and -ELEVATOR_INSERT_REQUEUE handling in __elv_add_request() for details. - -Note that block drivers must not requeue preceding requests while -completing latter requests in an ordered sequence. Currently, no -error checking is done against this. - -* Error handling. Currently, block layer will report error to upper -layer if any of requests in an ordered sequence fails. Unfortunately, -this doesn't seem to be enough. Look at the following request flow. -QUEUE_ORDERED_TAG_FLUSH is in use. - - [0] [1] [2] [3] [pre] [barrier] [post] < [4] [5] [6] ... > - still in elevator - -Let's say request [2], [3] are write requests to update file system -metadata (journal or whatever) and [barrier] is used to mark that -those updates are valid. Consider the following sequence. - - i. Requests [0] ~ [post] leaves the request queue and enters - low-level driver. - ii. After a while, unfortunately, something goes wrong and the - drive fails [2]. Note that any of [0], [1] and [3] could have - completed by this time, but [pre] couldn't have been finished - as the drive must process it in order and it failed before - processing that command. - iii. Error handling kicks in and determines that the error is - unrecoverable and fails [2], and resumes operation. - iv. [pre] [barrier] [post] gets processed. - v. *BOOM* power fails - -The problem here is that the barrier request is *supposed* to indicate -that filesystem update requests [2] and [3] made it safely to the -physical medium and, if the machine crashes after the barrier is -written, filesystem recovery code can depend on that. Sadly, that -isn't true in this case anymore. IOW, the success of a I/O barrier -should also be dependent on success of some of the preceding requests, -where only upper layer (filesystem) knows what 'some' is. - -This can be solved by implementing a way to tell the block layer which -requests affect the success of the following barrier request and -making lower lever drivers to resume operation on error only after -block layer tells it to do so. - -As the probability of this happening is very low and the drive should -be faulty, implementing the fix is probably an overkill. But, still, -it's there. - -* In previous drafts of barrier implementation, there was fallback -mechanism such that, if FUA or ordered TAG fails, less fancy ordered -mode can be selected and the failed barrier request is retried -automatically. The rationale for this feature was that as FUA is -pretty new in ATA world and ordered tag was never used widely, there -could be devices which report to support those features but choke when -actually given such requests. - - This was removed for two reasons 1. it's an overkill 2. it's -impossible to implement properly when TAG ordering is used as low -level drivers resume after an error automatically. If it's ever -needed adding it back and modifying low level drivers accordingly -shouldn't be difficult. diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt new file mode 100644 index 0000000..e578fee --- /dev/null +++ b/Documentation/block/cfq-iosched.txt @@ -0,0 +1,45 @@ +CFQ ioscheduler tunables +======================== + +slice_idle +---------- +This specifies how long CFQ should idle for next request on certain cfq queues +(for sequential workloads) and service trees (for random workloads) before +queue is expired and CFQ selects next queue to dispatch from. + +By default slice_idle is a non-zero value. That means by default we idle on +queues/service trees. This can be very helpful on highly seeky media like +single spindle SATA/SAS disks where we can cut down on overall number of +seeks and see improved throughput. + +Setting slice_idle to 0 will remove all the idling on queues/service tree +level and one should see an overall improved throughput on faster storage +devices like multiple SATA/SAS disks in hardware RAID configuration. The down +side is that isolation provided from WRITES also goes down and notion of +IO priority becomes weaker. + +So depending on storage and workload, it might be useful to set slice_idle=0. +In general I think for SATA/SAS disks and software RAID of SATA/SAS disks +keeping slice_idle enabled should be useful. For any configurations where +there are multiple spindles behind single LUN (Host based hardware RAID +controller or for storage arrays), setting slice_idle=0 might end up in better +throughput and acceptable latencies. + +CFQ IOPS Mode for group scheduling +=================================== +Basic CFQ design is to provide priority based time slices. Higher priority +process gets bigger time slice and lower priority process gets smaller time +slice. Measuring time becomes harder if storage is fast and supports NCQ and +it would be better to dispatch multiple requests from multiple cfq queues in +request queue at a time. In such scenario, it is not possible to measure time +consumed by single queue accurately. + +What is possible though is to measure number of requests dispatched from a +single queue and also allow dispatch from multiple cfq queue at the same time. +This effectively becomes the fairness in terms of IOPS (IO operations per +second). + +If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches +to IOPS mode and starts providing fairness in terms of number of requests +dispatched. Note that this mode switching takes effect only for group +scheduling. For non-cgroup users nothing should change. diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt new file mode 100644 index 0000000..83407d3 --- /dev/null +++ b/Documentation/block/writeback_cache_control.txt @@ -0,0 +1,86 @@ + +Explicit volatile write back cache control +===================================== + +Introduction +------------ + +Many storage devices, especially in the consumer market, come with volatile +write back caches. That means the devices signal I/O completion to the +operating system before data actually has hit the non-volatile storage. This +behavior obviously speeds up various workloads, but it means the operating +system needs to force data out to the non-volatile storage when it performs +a data integrity operation like fsync, sync or an unmount. + +The Linux block layer provides two simple mechanisms that let filesystems +control the caching behavior of the storage device. These mechanisms are +a forced cache flush, and the Force Unit Access (FUA) flag for requests. + + +Explicit cache flushes +---------------------- + +The REQ_FLUSH flag can be OR ed into the r/w flags of a bio submitted from +the filesystem and will make sure the volatile cache of the storage device +has been flushed before the actual I/O operation is started. This explicitly +guarantees that previously completed write requests are on non-volatile +storage before the flagged bio starts. In addition the REQ_FLUSH flag can be +set on an otherwise empty bio structure, which causes only an explicit cache +flush without any dependent I/O. It is recommend to use +the blkdev_issue_flush() helper for a pure cache flush. + + +Forced Unit Access +----------------- + +The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the +filesystem and will make sure that I/O completion for this request is only +signaled after the data has been committed to non-volatile storage. + + +Implementation details for filesystems +-------------------------------------- + +Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to +worry if the underlying devices need any explicit cache flushing and how +the Forced Unit Access is implemented. The REQ_FLUSH and REQ_FUA flags +may both be set on a single bio. + + +Implementation details for make_request_fn based block drivers +-------------------------------------------------------------- + +These drivers will always see the REQ_FLUSH and REQ_FUA bits as they sit +directly below the submit_bio interface. For remapping drivers the REQ_FUA +bits need to be propagated to underlying devices, and a global flush needs +to be implemented for bios with the REQ_FLUSH bit set. For real device +drivers that do not have a volatile cache the REQ_FLUSH and REQ_FUA bits +on non-empty bios can simply be ignored, and REQ_FLUSH requests without +data can be completed successfully without doing any work. Drivers for +devices with volatile caches need to implement the support for these +flags themselves without any help from the block layer. + + +Implementation details for request_fn based block drivers +-------------------------------------------------------------- + +For devices that do not support volatile write caches there is no driver +support required, the block layer completes empty REQ_FLUSH requests before +entering the driver and strips off the REQ_FLUSH and REQ_FUA bits from +requests that have a payload. For devices with volatile write caches the +driver needs to tell the block layer that it supports flushing caches by +doing: + + blk_queue_flush(sdkp->disk->queue, REQ_FLUSH); + +and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that +REQ_FLUSH requests with a payload are automatically turned into a sequence +of an empty REQ_FLUSH request followed by the actual write by the block +layer. For devices that also support the FUA bit the block layer needs +to be told to pass through the REQ_FUA bit using: + + blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA); + +and the driver must handle write requests that have the REQ_FUA bit set +in prep_fn/request_fn. If the FUA bit is not natively supported the block +layer turns it into an empty REQ_FLUSH request after the actual write. diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 48e0b21..d6da611 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -8,12 +8,17 @@ both at leaf nodes as well as at intermediate nodes in a storage hierarchy. Plan is to use the same cgroup based management interface for blkio controller and based on user options switch IO policies in the background. -In the first phase, this patchset implements proportional weight time based -division of disk policy. It is implemented in CFQ. Hence this policy takes -effect only on leaf nodes when CFQ is being used. +Currently two IO control policies are implemented. First one is proportional +weight time based division of disk policy. It is implemented in CFQ. Hence +this policy takes effect only on leaf nodes when CFQ is being used. The second +one is throttling policy which can be used to specify upper IO rate limits +on devices. This policy is implemented in generic block layer and can be +used on leaf nodes as well as higher level logical devices like device mapper. HOWTO ===== +Proportional Weight division of bandwidth +----------------------------------------- You can do a very simple testing of running two dd threads in two different cgroups. Here is what you can do. @@ -55,6 +60,35 @@ cgroups. Here is what you can do. group dispatched to the disk. We provide fairness in terms of disk time, so ideally io.disk_time of cgroups should be in proportion to the weight. +Throttling/Upper Limit policy +----------------------------- +- Enable Block IO controller + CONFIG_BLK_CGROUP=y + +- Enable throttling in block layer + CONFIG_BLK_DEV_THROTTLING=y + +- Mount blkio controller + mount -t cgroup -o blkio none /cgroup/blkio + +- Specify a bandwidth rate on particular device for root group. The format + for policy is ": ". + + echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device + + Above will put a limit of 1MB/second on reads happening for root group + on device having major/minor number 8:16. + +- Run dd to read a file and see if rate is throttled to 1MB/s or not. + + # dd if=/mnt/common/zerofile of=/dev/null bs=4K count=1024 + # iflag=direct + 1024+0 records in + 1024+0 records out + 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s + + Limits for writes can be put using blkio.write_bps_device file. + Various user visible config options =================================== CONFIG_BLK_CGROUP @@ -68,8 +102,13 @@ CONFIG_CFQ_GROUP_IOSCHED - Enables group scheduling in CFQ. Currently only 1 level of group creation is allowed. +CONFIG_BLK_DEV_THROTTLING + - Enable block device throttling support in block layer. + Details of cgroup files ======================= +Proportional weight policy files +-------------------------------- - blkio.weight - Specifies per cgroup weight. This is default weight of the group on all the devices until and unless overridden by per device rule. @@ -210,6 +249,67 @@ Details of cgroup files and minor number of the device and third field specifies the number of times a group was dequeued from a particular device. +Throttling/Upper limit policy files +----------------------------------- +- blkio.throttle.read_bps_device + - Specifies upper limit on READ rate from the device. IO rate is + specified in bytes per second. Rules are per deivce. Following is + the format. + + echo ": " > /cgrp/blkio.read_bps_device + +- blkio.throttle.write_bps_device + - Specifies upper limit on WRITE rate to the device. IO rate is + specified in bytes per second. Rules are per deivce. Following is + the format. + + echo ": " > /cgrp/blkio.write_bps_device + +- blkio.throttle.read_iops_device + - Specifies upper limit on READ rate from the device. IO rate is + specified in IO per second. Rules are per deivce. Following is + the format. + + echo ": " > /cgrp/blkio.read_iops_device + +- blkio.throttle.write_iops_device + - Specifies upper limit on WRITE rate to the device. IO rate is + specified in io per second. Rules are per deivce. Following is + the format. + + echo ": " > /cgrp/blkio.write_iops_device + +Note: If both BW and IOPS rules are specified for a device, then IO is + subjectd to both the constraints. + +- blkio.throttle.io_serviced + - Number of IOs (bio) completed to/from the disk by the group (as + seen by throttling policy). These are further divided by the type + of operation - read or write, sync or async. First two fields specify + the major and minor number of the device, third field specifies the + operation type and the fourth field specifies the number of IOs. + + blkio.io_serviced does accounting as seen by CFQ and counts are in + number of requests (struct request). On the other hand, + blkio.throttle.io_serviced counts number of IO in terms of number + of bios as seen by throttling policy. These bios can later be + merged by elevator and total number of requests completed can be + lesser. + +- blkio.throttle.io_service_bytes + - Number of bytes transferred to/from the disk by the group. These + are further divided by the type of operation - read or write, sync + or async. First two fields specify the major and minor number of the + device, third field specifies the operation type and the fourth field + specifies the number of bytes. + + These numbers should roughly be same as blkio.io_service_bytes as + updated by CFQ. The difference between two is that + blkio.io_service_bytes will not be updated if CFQ is not operating + on request queue. + +Common files among various policies +----------------------------------- - blkio.reset_stats - Writing an int to this file will result in resetting all the stats for that cgroup. @@ -217,6 +317,7 @@ Details of cgroup files CFQ sysfs tunable ================= /sys/block//queue/iosched/group_isolation +----------------------------------------------- If group_isolation=1, it provides stronger isolation between groups at the expense of throughput. By default group_isolation is 0. In general that @@ -243,6 +344,33 @@ By default one should run with group_isolation=0. If that is not sufficient and one wants stronger isolation between groups, then set group_isolation=1 but this will come at cost of reduced throughput. +/sys/block//queue/iosched/slice_idle +------------------------------------------ +On a faster hardware CFQ can be slow, especially with sequential workload. +This happens because CFQ idles on a single queue and single queue might not +drive deeper request queue depths to keep the storage busy. In such scenarios +one can try setting slice_idle=0 and that would switch CFQ to IOPS +(IO operations per second) mode on NCQ supporting hardware. + +That means CFQ will not idle between cfq queues of a cfq group and hence be +able to driver higher queue depth and achieve better throughput. That also +means that cfq provides fairness among groups in terms of IOPS and not in +terms of disk time. + +/sys/block//queue/iosched/group_idle +------------------------------------------ +If one disables idling on individual cfq queues and cfq service trees by +setting slice_idle=0, group_idle kicks in. That means CFQ will still idle +on the group in an attempt to provide fairness among groups. + +By default group_idle is same as slice_idle and does not do anything if +slice_idle is enabled. + +One can experience an overall throughput drop if you have created multiple +groups and put applications in that group which are not driving enough +IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle +on individual groups and throughput should improve. + What works ========== - Currently only sync IO queues are support. All the buffered writes are diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index d96a6db..9633da0 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. If you want to initialize a structure with an invalid GPIO number, use some negative number (perhaps "-EINVAL"); that will never be valid. To -test if a number could reference a GPIO, you may use this predicate: +test if such number from such a structure could reference a GPIO, you +may use this predicate: int gpio_is_valid(int number); A number that's not valid will be rejected by calls which may request or free GPIOs (see below). Other numbers may also be rejected; for -example, a number might be valid but unused on a given board. - -Whether a platform supports multiple GPIO controllers is currently a -platform-specific implementation issue. +example, a number might be valid but temporarily unused on a given board. +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. Using GPIOs ----------- @@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB and arrange that its includes and defines three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). -They may also want to provide a custom value for ARCH_NR_GPIOS. -ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled +It may also provide a custom value for ARCH_NR_GPIOS, so that it better +reflects the number of GPIOs in actual use on that platform, without +wasting static table space. (It should count both built-in/SoC GPIOs and +also ones on GPIO expanders. + +ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled into the kernel on that architecture. -ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user +ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user can enable it and build it into the kernel optionally. If neither of these options are selected, the platform does not support diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index ff45d1f..48ceabe 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -91,12 +91,11 @@ name The chip name. I2C devices get this attribute created automatically. RO -update_rate The rate at which the chip will update readings. +update_interval The interval at which the chip will update readings. Unit: millisecond RW - Some devices have a variable update rate. This attribute - can be used to change the update rate to the desired - frequency. + Some devices have a variable update rate or interval. + This attribute can be used to change it to the desired value. ************ diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt index 27a52b3..3d8a977 100644 --- a/Documentation/kernel-doc-nano-HOWTO.txt +++ b/Documentation/kernel-doc-nano-HOWTO.txt @@ -345,5 +345,10 @@ documentation, in , for the functions listed. section titled
from . Spaces are allowed in
; do not quote the
. +!C is replaced by nothing, but makes the tools check that +all DOC: sections and documented functions, symbols, etc. are used. +This makes sense to use when you use !F/!P only and want to verify +that all documentation is included. + Tim. */ diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f084af0..8dd7248 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1974,15 +1974,18 @@ and is between 256 and 4096 characters. It is defined in the file force Enable ASPM even on devices that claim not to support it. WARNING: Forcing ASPM on may cause system lockups. + pcie_ports= [PCIE] PCIe ports handling: + auto Ask the BIOS whether or not to use native PCIe services + associated with PCIe ports (PME, hot-plug, AER). Use + them only if that is allowed by the BIOS. + native Use native PCIe services associated with PCIe ports + unconditionally. + compat Treat PCIe ports as PCI-to-PCI bridges, disable the PCIe + ports driver. + pcie_pme= [PCIE,PM] Native PCIe PME signaling options: - Format: {auto|force}[,nomsi] - auto Use native PCIe PME signaling if the BIOS allows the - kernel to control PCIe config registers of root ports. - force Use native PCIe PME signaling even if the BIOS refuses - to allow the kernel to control the relevant PCIe config - registers. nomsi Do not use MSI for native PCIe PME signaling (this makes - all PCIe root ports use INTx for everything). + all PCIe root ports use INTx for all services). pcmv= [HW,PCMCIA] BadgePAD 4 diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 8a6a8c6..dc73bc5 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1640,15 +1640,6 @@ static void blk_request(struct virtqueue *vq) off = out->sector * 512; /* - * The block device implements "barriers", where the Guest indicates - * that it wants all previous writes to occur before this write. We - * don't have a way of asking our kernel to do a barrier, so we just - * synchronize all the data in the file. Pretty poor, no? - */ - if (out->type & VIRTIO_BLK_T_BARRIER) - fdatasync(vblk->fd); - - /* * In general the virtio block driver is allowed to try SCSI commands. * It'd be nice if we supported eject, for example, but we don't. */ @@ -1680,6 +1671,13 @@ static void blk_request(struct virtqueue *vq) /* Die, bad Guest, die. */ errx(1, "Write past end %llu+%u", off, ret); } + + wlen = sizeof(*in); + *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); + } else if (out->type & VIRTIO_BLK_T_FLUSH) { + /* Flush */ + ret = fdatasync(vblk->fd); + verbose("FLUSH fdatasync: %i\n", ret); wlen = sizeof(*in); *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); } else { @@ -1703,15 +1701,6 @@ static void blk_request(struct virtqueue *vq) } } - /* - * OK, so we noted that it was pretty poor to use an fdatasync as a - * barrier. But Christoph Hellwig points out that we need a sync - * *afterwards* as well: "Barriers specify no reordering to the front - * or the back." And Jens Axboe confirmed it, so here we are: - */ - if (out->type & VIRTIO_BLK_T_BARRIER) - fdatasync(vblk->fd); - /* Finished that request. */ add_used(vq, head, wlen); } @@ -1736,8 +1725,8 @@ static void setup_block_file(const char *filename) vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); vblk->len = lseek64(vblk->fd, 0, SEEK_END); - /* We support barriers. */ - add_feature(dev, VIRTIO_BLK_F_BARRIER); + /* We support FLUSH. */ + add_feature(dev, VIRTIO_BLK_F_FLUSH); /* Tell Guest how many sectors this device has. */ conf.capacity = cpu_to_le64(vblk->len / 512); diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index c91ccc0..38c10fd 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler mutex semantics are sufficient for your code, then there are a couple of advantages of mutexes: - - 'struct mutex' is smaller on most architectures: .e.g on x86, + - 'struct mutex' is smaller on most architectures: E.g. on x86, 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes. A smaller structure size means less RAM footprint, and better CPU-cache utilization. @@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined: void mutex_lock_nested(struct mutex *lock, unsigned int subclass); int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt index 9363e05..8ed1758 100644 --- a/Documentation/power/regulator/overview.txt +++ b/Documentation/power/regulator/overview.txt @@ -13,7 +13,7 @@ regulators (where voltage output is controllable) and current sinks (where current limit is controllable). (C) 2008 Wolfson Microelectronics PLC. -Author: Liam Girdwood +Author: Liam Girdwood Nomenclature diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index ce46fa1..37c6aad 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -296,6 +296,7 @@ Conexant 5051 Conexant 5066 ============= laptop Basic Laptop config (default) + hp-laptop HP laptops, e g G60 dell-laptop Dell laptops dell-vostro Dell Vostro olpc-xo-1_5 OLPC XO 1.5 diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt new file mode 100644 index 0000000..e4498a2 --- /dev/null +++ b/Documentation/workqueue.txt @@ -0,0 +1,380 @@ + +Concurrency Managed Workqueue (cmwq) + +September, 2010 Tejun Heo + Florian Mickler + +CONTENTS + +1. Introduction +2. Why cmwq? +3. The Design +4. Application Programming Interface (API) +5. Example Execution Scenarios +6. Guidelines + + +1. Introduction + +There are many cases where an asynchronous process execution context +is needed and the workqueue (wq) API is the most commonly used +mechanism for such cases. + +When such an asynchronous execution context is needed, a work item +describing which function to execute is put on a queue. An +independent thread serves as the asynchronous execution context. The +queue is called workqueue and the thread is called worker. + +While there are work items on the workqueue the worker executes the +functions associated with the work items one after the other. When +there is no work item left on the workqueue the worker becomes idle. +When a new work item gets queued, the worker begins executing again. + + +2. Why cmwq? + +In the original wq implementation, a multi threaded (MT) wq had one +worker thread per CPU and a single threaded (ST) wq had one worker +thread system-wide. A single MT wq needed to keep around the same +number of workers as the number of CPUs. The kernel grew a lot of MT +wq users over the years and with the number of CPU cores continuously +rising, some systems saturated the default 32k PID space just booting +up. + +Although MT wq wasted a lot of resource, the level of concurrency +provided was unsatisfactory. The limitation was common to both ST and +MT wq albeit less severe on MT. Each wq maintained its own separate +worker pool. A MT wq could provide only one execution context per CPU +while a ST wq one for the whole system. Work items had to compete for +those very limited execution contexts leading to various problems +including proneness to deadlocks around the single execution context. + +The tension between the provided level of concurrency and resource +usage also forced its users to make unnecessary tradeoffs like libata +choosing to use ST wq for polling PIOs and accepting an unnecessary +limitation that no two polling PIOs can progress at the same time. As +MT wq don't provide much better concurrency, users which require +higher level of concurrency, like async or fscache, had to implement +their own thread pool. + +Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with +focus on the following goals. + +* Maintain compatibility with the original workqueue API. + +* Use per-CPU unified worker pools shared by all wq to provide + flexible level of concurrency on demand without wasting a lot of + resource. + +* Automatically regulate worker pool and level of concurrency so that + the API users don't need to worry about such details. + + +3. The Design + +In order to ease the asynchronous execution of functions a new +abstraction, the work item, is introduced. + +A work item is a simple struct that holds a pointer to the function +that is to be executed asynchronously. Whenever a driver or subsystem +wants a function to be executed asynchronously it has to set up a work +item pointing to that function and queue that work item on a +workqueue. + +Special purpose threads, called worker threads, execute the functions +off of the queue, one after the other. If no work is queued, the +worker threads become idle. These worker threads are managed in so +called thread-pools. + +The cmwq design differentiates between the user-facing workqueues that +subsystems and drivers queue work items on and the backend mechanism +which manages thread-pool and processes the queued work items. + +The backend is called gcwq. There is one gcwq for each possible CPU +and one gcwq to serve work items queued on unbound workqueues. + +Subsystems and drivers can create and queue work items through special +workqueue API functions as they see fit. They can influence some +aspects of the way the work items are executed by setting flags on the +workqueue they are putting the work item on. These flags include +things like CPU locality, reentrancy, concurrency limits and more. To +get a detailed overview refer to the API description of +alloc_workqueue() below. + +When a work item is queued to a workqueue, the target gcwq is +determined according to the queue parameters and workqueue attributes +and appended on the shared worklist of the gcwq. For example, unless +specifically overridden, a work item of a bound workqueue will be +queued on the worklist of exactly that gcwq that is associated to the +CPU the issuer is running on. + +For any worker pool implementation, managing the concurrency level +(how many execution contexts are active) is an important issue. cmwq +tries to keep the concurrency at a minimal but sufficient level. +Minimal to save resources and sufficient in that the system is used at +its full capacity. + +Each gcwq bound to an actual CPU implements concurrency management by +hooking into the scheduler. The gcwq is notified whenever an active +worker wakes up or sleeps and keeps track of the number of the +currently runnable workers. Generally, work items are not expected to +hog a CPU and consume many cycles. That means maintaining just enough +concurrency to prevent work processing from stalling should be +optimal. As long as there are one or more runnable workers on the +CPU, the gcwq doesn't start execution of a new work, but, when the +last running worker goes to sleep, it immediately schedules a new +worker so that the CPU doesn't sit idle while there are pending work +items. This allows using a minimal number of workers without losing +execution bandwidth. + +Keeping idle workers around doesn't cost other than the memory space +for kthreads, so cmwq holds onto idle ones for a while before killing +them. + +For an unbound wq, the above concurrency management doesn't apply and +the gcwq for the pseudo unbound CPU tries to start executing all work +items as soon as possible. The responsibility of regulating +concurrency level is on the users. There is also a flag to mark a +bound wq to ignore the concurrency management. Please refer to the +API section for details. + +Forward progress guarantee relies on that workers can be created when +more execution contexts are necessary, which in turn is guaranteed +through the use of rescue workers. All work items which might be used +on code paths that handle memory reclaim are required to be queued on +wq's that have a rescue-worker reserved for execution under memory +pressure. Else it is possible that the thread-pool deadlocks waiting +for execution contexts to free up. + + +4. Application Programming Interface (API) + +alloc_workqueue() allocates a wq. The original create_*workqueue() +functions are deprecated and scheduled for removal. alloc_workqueue() +takes three arguments - @name, @flags and @max_active. @name is the +name of the wq and also used as the name of the rescuer thread if +there is one. + +A wq no longer manages execution resources but serves as a domain for +forward progress guarantee, flush and work item attributes. @flags +and @max_active control how work items are assigned execution +resources, scheduled and executed. + +@flags: + + WQ_NON_REENTRANT + + By default, a wq guarantees non-reentrance only on the same + CPU. A work item may not be executed concurrently on the same + CPU by multiple workers but is allowed to be executed + concurrently on multiple CPUs. This flag makes sure + non-reentrance is enforced across all CPUs. Work items queued + to a non-reentrant wq are guaranteed to be executed by at most + one worker system-wide at any given time. + + WQ_UNBOUND + + Work items queued to an unbound wq are served by a special + gcwq which hosts workers which are not bound to any specific + CPU. This makes the wq behave as a simple execution context + provider without concurrency management. The unbound gcwq + tries to start execution of work items as soon as possible. + Unbound wq sacrifices locality but is useful for the following + cases. + + * Wide fluctuation in the concurrency level requirement is + expected and using bound wq may end up creating large number + of mostly unused workers across different CPUs as the issuer + hops through different CPUs. + + * Long running CPU intensive workloads which can be better + managed by the system scheduler. + + WQ_FREEZEABLE + + A freezeable wq participates in the freeze phase of the system + suspend operations. Work items on the wq are drained and no + new work item starts execution until thawed. + + WQ_RESCUER + + All wq which might be used in the memory reclaim paths _MUST_ + have this flag set. This reserves one worker exclusively for + the execution of this wq under memory pressure. + + WQ_HIGHPRI + + Work items of a highpri wq are queued at the head of the + worklist of the target gcwq and start execution regardless of + the current concurrency level. In other words, highpri work + items will always start execution as soon as execution + resource is available. + + Ordering among highpri work items is preserved - a highpri + work item queued after another highpri work item will start + execution after the earlier highpri work item starts. + + Although highpri work items are not held back by other + runnable work items, they still contribute to the concurrency + level. Highpri work items in runnable state will prevent + non-highpri work items from starting execution. + + This flag is meaningless for unbound wq. + + WQ_CPU_INTENSIVE + + Work items of a CPU intensive wq do not contribute to the + concurrency level. In other words, runnable CPU intensive + work items will not prevent other work items from starting + execution. This is useful for bound work items which are + expected to hog CPU cycles so that their execution is + regulated by the system scheduler. + + Although CPU intensive work items don't contribute to the + concurrency level, start of their executions is still + regulated by the concurrency management and runnable + non-CPU-intensive work items can delay execution of CPU + intensive work items. + + This flag is meaningless for unbound wq. + + WQ_HIGHPRI | WQ_CPU_INTENSIVE + + This combination makes the wq avoid interaction with + concurrency management completely and behave as a simple + per-CPU execution context provider. Work items queued on a + highpri CPU-intensive wq start execution as soon as resources + are available and don't affect execution of other work items. + +@max_active: + +@max_active determines the maximum number of execution contexts per +CPU which can be assigned to the work items of a wq. For example, +with @max_active of 16, at most 16 work items of the wq can be +executing at the same time per CPU. + +Currently, for a bound wq, the maximum limit for @max_active is 512 +and the default value used when 0 is specified is 256. For an unbound +wq, the limit is higher of 512 and 4 * num_possible_cpus(). These +values are chosen sufficiently high such that they are not the +limiting factor while providing protection in runaway cases. + +The number of active work items of a wq is usually regulated by the +users of the wq, more specifically, by how many work items the users +may queue at the same time. Unless there is a specific need for +throttling the number of active work items, specifying '0' is +recommended. + +Some users depend on the strict execution ordering of ST wq. The +combination of @max_active of 1 and WQ_UNBOUND is used to achieve this +behavior. Work items on such wq are always queued to the unbound gcwq +and only one work item can be active at any given time thus achieving +the same ordering property as ST wq. + + +5. Example Execution Scenarios + +The following example execution scenarios try to illustrate how cmwq +behave under different configurations. + + Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU. + w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms + again before finishing. w1 and w2 burn CPU for 5ms then sleep for + 10ms. + +Ignoring all other tasks, works and processing overhead, and assuming +simple FIFO scheduling, the following is one highly simplified version +of possible sequences of events with the original wq. + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 starts and burns CPU + 25 w1 sleeps + 35 w1 wakes up and finishes + 35 w2 starts and burns CPU + 40 w2 sleeps + 50 w2 wakes up and finishes + +And with cmwq with @max_active >= 3, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 10 w2 starts and burns CPU + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + +If @max_active == 2, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 20 w2 starts and burns CPU + 25 w2 sleeps + 35 w2 wakes up and finishes + +Now, let's assume w1 and w2 are queued to a different wq q1 which has +WQ_HIGHPRI set, + + TIME IN MSECS EVENT + 0 w1 and w2 start and burn CPU + 5 w1 sleeps + 10 w2 sleeps + 10 w0 starts and burns CPU + 15 w0 sleeps + 15 w1 wakes up and finishes + 20 w2 wakes up and finishes + 25 w0 wakes up and burns CPU + 30 w0 finishes + +If q1 has WQ_CPU_INTENSIVE set, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 and w2 start and burn CPU + 10 w1 sleeps + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + + +6. Guidelines + +* Do not forget to use WQ_RESCUER if a wq may process work items which + are used during memory reclaim. Each wq with WQ_RESCUER set has one + rescuer thread reserved for it. If there is dependency among + multiple work items used during memory reclaim, they should be + queued to separate wq each with WQ_RESCUER. + +* Unless strict ordering is required, there is no need to use ST wq. + +* Unless there is a specific need, using 0 for @max_active is + recommended. In most use cases, concurrency level usually stays + well under the default limit. + +* A wq serves as a domain for forward progress guarantee (WQ_RESCUER), + flush and work item attributes. Work items which are not involved + in memory reclaim and don't need to be flushed as a part of a group + of work items, and don't require any special attribute, can use one + of the system wq. There is no difference in execution + characteristics between using a dedicated wq and a system wq. + +* Unless work items are expected to consume a huge amount of CPU + cycles, using a bound wq is usually beneficial due to the increased + level of locality in wq operations and work item execution. diff --git a/MAINTAINERS b/MAINTAINERS index c36f5d7..411b0d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1135,7 +1135,7 @@ ATLX ETHERNET DRIVERS M: Jay Cliburn M: Chris Snook M: Jie Yang -L: atl1-devel@lists.sourceforge.net +L: netdev@vger.kernel.org W: http://sourceforge.net/projects/atl1 W: http://atl1.sourceforge.net S: Maintained @@ -1445,6 +1445,16 @@ S: Maintained F: Documentation/video4linux/cafe_ccic F: drivers/media/video/cafe_ccic* +CAIF NETWORK LAYER +M: Sjur Braendeland +L: netdev@vger.kernel.org +S: Supported +F: Documentation/networking/caif/ +F: drivers/net/caif/ +F: include/linux/caif/ +F: include/net/caif/ +F: net/caif/ + CALGARY x86-64 IOMMU M: Muli Ben-Yehuda M: "Jon D. Mason" @@ -2201,6 +2211,12 @@ L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/ehca/ +EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER +M: Breno Leitao +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ehea/ + EMBEDDED LINUX M: Paul Gortmaker M: Matt Mackall @@ -2641,9 +2657,12 @@ S: Maintained F: drivers/media/video/gspca/ HARDWARE MONITORING +M: Jean Delvare +M: Guenter Roeck L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ -S: Orphan +T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ +S: Maintained F: Documentation/hwmon/ F: drivers/hwmon/ F: include/linux/hwmon*.h @@ -2781,11 +2800,6 @@ S: Maintained F: arch/x86/kernel/hpet.c F: arch/x86/include/asm/hpet.h -HPET: ACPI -M: Bob Picco -S: Maintained -F: drivers/char/hpet.c - HPFS FILESYSTEM M: Mikulas Patocka W: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi @@ -3398,7 +3412,7 @@ F: drivers/s390/kvm/ KEXEC M: Eric Biederman -W: http://ftp.kernel.org/pub/linux/kernel/people/horms/kexec-tools/ +W: http://kernel.org/pub/linux/utils/kernel/kexec/ L: kexec@lists.infradead.org S: Maintained F: include/linux/kexec.h @@ -3923,8 +3937,7 @@ F: Documentation/sound/oss/MultiSound F: sound/oss/msnd* MULTITECH MULTIPORT CARD (ISICOM) -M: Jiri Slaby -S: Maintained +S: Orphan F: drivers/char/isicom.c F: include/linux/isicom.h @@ -4604,7 +4617,7 @@ F: include/linux/preempt.h PRISM54 WIRELESS DRIVER M: "Luis R. Rodriguez" L: linux-wireless@vger.kernel.org -W: http://prism54.org +W: http://wireless.kernel.org/en/users/Drivers/p54 S: Obsolete F: drivers/net/wireless/prism54/ @@ -4805,6 +4818,7 @@ RCUTORTURE MODULE M: Josh Triplett M: "Paul E. McKenney" S: Supported +T: git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git F: Documentation/RCU/torture.txt F: kernel/rcutorture.c @@ -4829,6 +4843,7 @@ M: Dipankar Sarma M: "Paul E. McKenney" W: http://www.rdrop.com/users/paulmck/rclock/ S: Supported +T: git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu.git F: Documentation/RCU/ F: include/linux/rcu* F: include/linux/srcu* @@ -4836,12 +4851,10 @@ F: kernel/rcu* F: kernel/srcu* X: kernel/rcutorture.c -REAL TIME CLOCK DRIVER +REAL TIME CLOCK DRIVER (LEGACY) M: Paul Gortmaker S: Maintained -F: Documentation/rtc.txt -F: drivers/rtc/ -F: include/linux/rtc.h +F: drivers/char/rtc.c REAL TIME CLOCK (RTC) SUBSYSTEM M: Alessandro Zummo diff --git a/Makefile b/Makefile index 4df9873..53cd4aa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 36 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4-stor15 NAME = Sheep on Meth # *DOCUMENTATION* diff --git a/arch/alpha/include/asm/cache.h b/arch/alpha/include/asm/cache.h index f199e69..ad368a9 100644 --- a/arch/alpha/include/asm/cache.h +++ b/arch/alpha/include/asm/cache.h @@ -17,7 +17,6 @@ # define L1_CACHE_SHIFT 5 #endif -#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif diff --git a/arch/alpha/include/asm/cacheflush.h b/arch/alpha/include/asm/cacheflush.h index 01d71e1..012f124 100644 --- a/arch/alpha/include/asm/cacheflush.h +++ b/arch/alpha/include/asm/cacheflush.h @@ -43,6 +43,8 @@ extern void smp_imb(void); /* ??? Ought to use this in arch/alpha/kernel/signal.c too. */ #ifndef CONFIG_SMP +#include + extern void __load_new_mm_context(struct mm_struct *); static inline void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 804e531..058937b 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -449,10 +449,13 @@ #define __NR_pwritev 491 #define __NR_rt_tgsigqueueinfo 492 #define __NR_perf_event_open 493 +#define __NR_fanotify_init 494 +#define __NR_fanotify_mark 495 +#define __NR_prlimit64 496 #ifdef __KERNEL__ -#define NR_SYSCALLS 494 +#define NR_SYSCALLS 497 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR @@ -463,6 +466,7 @@ #define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_RT_SIGSUSPEND /* "Conditional" syscalls. What we want is diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index b45d913..ab1ee0a 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -317,14 +317,14 @@ ret_from_sys_call: ldq $0, SP_OFF($sp) and $0, 8, $0 beq $0, restore_all -ret_from_reschedule: +ret_to_user: /* Make sure need_resched and sigpending don't change between sampling and the rti. */ lda $16, 7 call_pal PAL_swpipl ldl $5, TI_FLAGS($8) and $5, _TIF_WORK_MASK, $2 - bne $5, work_pending + bne $2, work_pending restore_all: RESTORE_ALL call_pal PAL_rti @@ -363,7 +363,7 @@ $ret_success: * $8: current. * $19: The old syscall number, or zero if this is not a return * from a syscall that errored and is possibly restartable. - * $20: Error indication. + * $20: The old a3 value */ .align 4 @@ -392,12 +392,18 @@ $work_resched: $work_notifysig: mov $sp, $16 - br $1, do_switch_stack + bsr $1, do_switch_stack mov $sp, $17 mov $5, $18 + mov $19, $9 /* save old syscall number */ + mov $20, $10 /* save old a3 */ + and $5, _TIF_SIGPENDING, $2 + cmovne $2, 0, $9 /* we don't want double syscall restarts */ jsr $26, do_notify_resume + mov $9, $19 + mov $10, $20 bsr $1, undo_switch_stack - br restore_all + br ret_to_user .end work_pending /* @@ -430,6 +436,7 @@ strace: beq $1, 1f ldq $27, 0($2) 1: jsr $26, ($27), sys_gettimeofday +ret_from_straced: ldgp $gp, 0($26) /* check return.. */ @@ -757,11 +764,15 @@ sys_vfork: .ent sys_sigreturn sys_sigreturn: .prologue 0 + lda $9, ret_from_straced + cmpult $26, $9, $9 mov $sp, $17 lda $18, -SWITCH_STACK_SIZE($sp) lda $sp, -SWITCH_STACK_SIZE($sp) jsr $26, do_sigreturn - br $1, undo_switch_stack + bne $9, 1f + jsr $26, syscall_trace +1: br $1, undo_switch_stack br ret_from_sys_call .end sys_sigreturn @@ -770,47 +781,19 @@ sys_sigreturn: .ent sys_rt_sigreturn sys_rt_sigreturn: .prologue 0 + lda $9, ret_from_straced + cmpult $26, $9, $9 mov $sp, $17 lda $18, -SWITCH_STACK_SIZE($sp) lda $sp, -SWITCH_STACK_SIZE($sp) jsr $26, do_rt_sigreturn - br $1, undo_switch_stack + bne $9, 1f + jsr $26, syscall_trace +1: br $1, undo_switch_stack br ret_from_sys_call .end sys_rt_sigreturn .align 4 - .globl sys_sigsuspend - .ent sys_sigsuspend -sys_sigsuspend: - .prologue 0 - mov $sp, $17 - br $1, do_switch_stack - mov $sp, $18 - subq $sp, 16, $sp - stq $26, 0($sp) - jsr $26, do_sigsuspend - ldq $26, 0($sp) - lda $sp, SWITCH_STACK_SIZE+16($sp) - ret -.end sys_sigsuspend - - .align 4 - .globl sys_rt_sigsuspend - .ent sys_rt_sigsuspend -sys_rt_sigsuspend: - .prologue 0 - mov $sp, $18 - br $1, do_switch_stack - mov $sp, $19 - subq $sp, 16, $sp - stq $26, 0($sp) - jsr $26, do_rt_sigsuspend - ldq $26, 0($sp) - lda $sp, SWITCH_STACK_SIZE+16($sp) - ret -.end sys_rt_sigsuspend - - .align 4 .globl sys_sethae .ent sys_sethae sys_sethae: diff --git a/arch/alpha/kernel/err_ev6.c b/arch/alpha/kernel/err_ev6.c index 8ca6345..253cf1a 100644 --- a/arch/alpha/kernel/err_ev6.c +++ b/arch/alpha/kernel/err_ev6.c @@ -90,11 +90,13 @@ static int ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn, u64 c_stat, u64 c_sts, int print) { - char *sourcename[] = { "UNKNOWN", "UNKNOWN", "UNKNOWN", - "MEMORY", "BCACHE", "DCACHE", - "BCACHE PROBE", "BCACHE PROBE" }; - char *streamname[] = { "D", "I" }; - char *bitsname[] = { "SINGLE", "DOUBLE" }; + static const char * const sourcename[] = { + "UNKNOWN", "UNKNOWN", "UNKNOWN", + "MEMORY", "BCACHE", "DCACHE", + "BCACHE PROBE", "BCACHE PROBE" + }; + static const char * const streamname[] = { "D", "I" }; + static const char * const bitsname[] = { "SINGLE", "DOUBLE" }; int status = MCHK_DISPOSITION_REPORT; int source = -1, stream = -1, bits = -1; diff --git a/arch/alpha/kernel/err_marvel.c b/arch/alpha/kernel/err_marvel.c index 52a79df..648ae88 100644 --- a/arch/alpha/kernel/err_marvel.c +++ b/arch/alpha/kernel/err_marvel.c @@ -109,7 +109,7 @@ marvel_print_err_cyc(u64 err_cyc) #define IO7__ERR_CYC__CYCLE__M (0x7) printk("%s Packet In Error: %s\n" - "%s Error in %s, cycle %ld%s%s\n", + "%s Error in %s, cycle %lld%s%s\n", err_print_prefix, packet_desc[EXTRACT(err_cyc, IO7__ERR_CYC__PACKET)], err_print_prefix, @@ -313,7 +313,7 @@ marvel_print_po7_ugbge_sym(u64 ugbge_sym) } printk("%s Up Hose Garbage Symptom:\n" - "%s Source Port: %ld - Dest PID: %ld - OpCode: %s\n", + "%s Source Port: %lld - Dest PID: %lld - OpCode: %s\n", err_print_prefix, err_print_prefix, EXTRACT(ugbge_sym, IO7__PO7_UGBGE_SYM__UPH_SRC_PORT), @@ -552,7 +552,7 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt) #define IO7__POX_SPLCMPLT__REM_BYTE_COUNT__M (0xfff) printk("%s Split Completion Error:\n" - "%s Source (Bus:Dev:Func): %ld:%ld:%ld\n", + "%s Source (Bus:Dev:Func): %lld:%lld:%lld\n", err_print_prefix, err_print_prefix, EXTRACT(spl_cmplt, IO7__POX_SPLCMPLT__SOURCE_BUS), @@ -589,22 +589,23 @@ marvel_print_pox_spl_cmplt(u64 spl_cmplt) static void marvel_print_pox_trans_sum(u64 trans_sum) { - char *pcix_cmd[] = { "Interrupt Acknowledge", - "Special Cycle", - "I/O Read", - "I/O Write", - "Reserved", - "Reserved / Device ID Message", - "Memory Read", - "Memory Write", - "Reserved / Alias to Memory Read Block", - "Reserved / Alias to Memory Write Block", - "Configuration Read", - "Configuration Write", - "Memory Read Multiple / Split Completion", - "Dual Address Cycle", - "Memory Read Line / Memory Read Block", - "Memory Write and Invalidate / Memory Write Block" + static const char * const pcix_cmd[] = { + "Interrupt Acknowledge", + "Special Cycle", + "I/O Read", + "I/O Write", + "Reserved", + "Reserved / Device ID Message", + "Memory Read", + "Memory Write", + "Reserved / Alias to Memory Read Block", + "Reserved / Alias to Memory Write Block", + "Configuration Read", + "Configuration Write", + "Memory Read Multiple / Split Completion", + "Dual Address Cycle", + "Memory Read Line / Memory Read Block", + "Memory Write and Invalidate / Memory Write Block" }; #define IO7__POX_TRANSUM__PCI_ADDR__S (0) diff --git a/arch/alpha/kernel/err_titan.c b/arch/alpha/kernel/err_titan.c index f7ed97c..c3b3781 100644 --- a/arch/alpha/kernel/err_titan.c +++ b/arch/alpha/kernel/err_titan.c @@ -75,8 +75,12 @@ titan_parse_p_serror(int which, u64 serror, int print) int status = MCHK_DISPOSITION_REPORT; #ifdef CONFIG_VERBOSE_MCHECK - char *serror_src[] = {"GPCI", "APCI", "AGP HP", "AGP LP"}; - char *serror_cmd[] = {"DMA Read", "DMA RMW", "SGTE Read", "Reserved"}; + static const char * const serror_src[] = { + "GPCI", "APCI", "AGP HP", "AGP LP" + }; + static const char * const serror_cmd[] = { + "DMA Read", "DMA RMW", "SGTE Read", "Reserved" + }; #endif /* CONFIG_VERBOSE_MCHECK */ #define TITAN__PCHIP_SERROR__LOST_UECC (1UL << 0) @@ -140,14 +144,15 @@ titan_parse_p_perror(int which, int port, u64 perror, int print) int status = MCHK_DISPOSITION_REPORT; #ifdef CONFIG_VERBOSE_MCHECK - char *perror_cmd[] = { "Interrupt Acknowledge", "Special Cycle", - "I/O Read", "I/O Write", - "Reserved", "Reserved", - "Memory Read", "Memory Write", - "Reserved", "Reserved", - "Configuration Read", "Configuration Write", - "Memory Read Multiple", "Dual Address Cycle", - "Memory Read Line","Memory Write and Invalidate" + static const char * const perror_cmd[] = { + "Interrupt Acknowledge", "Special Cycle", + "I/O Read", "I/O Write", + "Reserved", "Reserved", + "Memory Read", "Memory Write", + "Reserved", "Reserved", + "Configuration Read", "Configuration Write", + "Memory Read Multiple", "Dual Address Cycle", + "Memory Read Line", "Memory Write and Invalidate" }; #endif /* CONFIG_VERBOSE_MCHECK */ @@ -273,11 +278,11 @@ titan_parse_p_agperror(int which, u64 agperror, int print) int cmd, len; unsigned long addr; - char *agperror_cmd[] = { "Read (low-priority)", "Read (high-priority)", - "Write (low-priority)", - "Write (high-priority)", - "Reserved", "Reserved", - "Flush", "Fence" + static const char * const agperror_cmd[] = { + "Read (low-priority)", "Read (high-priority)", + "Write (low-priority)", "Write (high-priority)", + "Reserved", "Reserved", + "Flush", "Fence" }; #endif /* CONFIG_VERBOSE_MCHECK */ diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 5d1e6d6..547e8b8 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -69,7 +68,6 @@ SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start, { struct mm_struct *mm; - lock_kernel(); mm = current->mm; mm->end_code = bss_start + bss_len; mm->start_brk = bss_start + bss_len; @@ -78,7 +76,6 @@ SYSCALL_DEFINE4(osf_set_program_attributes, unsigned long, text_start, printk("set_program_attributes(%lx %lx %lx %lx)\n", text_start, text_len, bss_start, bss_len); #endif - unlock_kernel(); return 0; } @@ -517,7 +514,6 @@ SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code, long error; int __user *min_buf_size_ptr; - lock_kernel(); switch (code) { case PL_SET: if (get_user(error, &args->set.nbytes)) @@ -547,7 +543,6 @@ SYSCALL_DEFINE2(osf_proplist_syscall, enum pl_code, code, error = -EOPNOTSUPP; break; }; - unlock_kernel(); return error; } @@ -594,7 +589,7 @@ SYSCALL_DEFINE2(osf_sigstack, struct sigstack __user *, uss, SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) { - char *sysinfo_table[] = { + const char *sysinfo_table[] = { utsname()->sysname, utsname()->nodename, utsname()->release, @@ -606,7 +601,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) "dummy", /* secure RPC domain */ }; unsigned long offset; - char *res; + const char *res; long len, err = -EINVAL; offset = command-1; diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c index 738fc82..b899e95 100644 --- a/arch/alpha/kernel/pci-sysfs.c +++ b/arch/alpha/kernel/pci-sysfs.c @@ -66,7 +66,7 @@ static int pci_mmap_resource(struct kobject *kobj, { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); - struct resource *res = (struct resource *)attr->private; + struct resource *res = attr->private; enum pci_mmap_state mmap_type; struct pci_bus_region bar; int i; diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 51c39fa..85d8e4f 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -241,20 +241,20 @@ static inline unsigned long alpha_read_pmc(int idx) static int alpha_perf_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) { - long left = atomic64_read(&hwc->period_left); + long left = local64_read(&hwc->period_left); long period = hwc->sample_period; int ret = 0; if (unlikely(left <= -period)) { left = period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } if (unlikely(left <= 0)) { left += period; - atomic64_set(&hwc->period_left, left); + local64_set(&hwc->period_left, left); hwc->last_period = period; ret = 1; } @@ -269,7 +269,7 @@ static int alpha_perf_event_set_period(struct perf_event *event, if (left > (long)alpha_pmu->pmc_max_period[idx]) left = alpha_pmu->pmc_max_period[idx]; - atomic64_set(&hwc->prev_count, (unsigned long)(-left)); + local64_set(&hwc->prev_count, (unsigned long)(-left)); alpha_write_pmc(idx, (unsigned long)(-left)); @@ -300,10 +300,10 @@ static unsigned long alpha_perf_event_update(struct perf_event *event, long delta; again: - prev_raw_count = atomic64_read(&hwc->prev_count); + prev_raw_count = local64_read(&hwc->prev_count); new_raw_count = alpha_read_pmc(idx); - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; @@ -316,8 +316,8 @@ again: delta += alpha_pmu->pmc_max_period[idx] + 1; } - atomic64_add(delta, &event->count); - atomic64_sub(delta, &hwc->period_left); + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); return new_raw_count; } @@ -636,7 +636,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (!hwc->sample_period) { hwc->sample_period = alpha_pmu->pmc_max_period[0]; hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); + local64_set(&hwc->period_left, hwc->sample_period); } return 0; diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index 3d2627e..d3e52d3 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -156,9 +156,6 @@ extern void SMC669_Init(int); /* es1888.c */ extern void es1888_init(void); -/* ns87312.c */ -extern void ns87312_enable_ide(long ide_base); - /* ../lib/fpreg.c */ extern void alpha_write_fp_reg (unsigned long reg, unsigned long val); extern unsigned long alpha_read_fp_reg (unsigned long reg); diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 0932dbb..0f6b51a 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -144,8 +144,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, /* * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage int -do_sigsuspend(old_sigset_t mask, struct pt_regs *regs, struct switch_stack *sw) +SYSCALL_DEFINE1(sigsuspend, old_sigset_t, mask) { mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); @@ -154,41 +153,6 @@ do_sigsuspend(old_sigset_t mask, struct pt_regs *regs, struct switch_stack *sw) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - /* Indicate EINTR on return from any possible signal handler, - which will not come back through here, but via sigreturn. */ - regs->r0 = EINTR; - regs->r19 = 1; - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - return -ERESTARTNOHAND; -} - -asmlinkage int -do_rt_sigsuspend(sigset_t __user *uset, size_t sigsetsize, - struct pt_regs *regs, struct switch_stack *sw) -{ - sigset_t set; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&set, uset, sizeof(set))) - return -EFAULT; - - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - /* Indicate EINTR on return from any possible signal handler, - which will not come back through here, but via sigreturn. */ - regs->r0 = EINTR; - regs->r19 = 1; - current->state = TASK_INTERRUPTIBLE; schedule(); set_thread_flag(TIF_RESTORE_SIGMASK); @@ -239,6 +203,8 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long usp; long i, err = __get_user(regs->pc, &sc->sc_pc); + current_thread_info()->restart_block.fn = do_no_restart_syscall; + sw->r26 = (unsigned long) ret_from_sys_call; err |= __get_user(regs->r0, sc->sc_regs+0); @@ -591,7 +557,6 @@ syscall_restart(unsigned long r0, unsigned long r19, regs->pc -= 4; break; case ERESTART_RESTARTBLOCK: - current_thread_info()->restart_block.fn = do_no_restart_syscall; regs->r0 = EINTR; break; } diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index 4afc1a1..f0df3fb 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -87,7 +87,7 @@ static int srm_env_proc_show(struct seq_file *m, void *v) srm_env_t *entry; char *page; - entry = (srm_env_t *)m->private; + entry = m->private; page = (char *)__get_free_page(GFP_USER); if (!page) return -ENOMEM; diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index affd0f3..14c8898 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -33,7 +33,7 @@ #include "irq_impl.h" #include "pci_impl.h" #include "machvec_impl.h" - +#include "pc873xx.h" /* Note mask bit is true for DISABLED irqs. */ static unsigned long cached_irq_mask = ~0UL; @@ -236,17 +236,30 @@ cabriolet_map_irq(struct pci_dev *dev, u8 slot, u8 pin) } static inline void __init +cabriolet_enable_ide(void) +{ + if (pc873xx_probe() == -1) { + printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n"); + } else { + printk(KERN_INFO "Found %s Super IO chip at 0x%x\n", + pc873xx_get_model(), pc873xx_get_base()); + + pc873xx_enable_ide(); + } +} + +static inline void __init cabriolet_init_pci(void) { common_init_pci(); - ns87312_enable_ide(0x398); + cabriolet_enable_ide(); } static inline void __init cia_cab_init_pci(void) { cia_init_pci(); - ns87312_enable_ide(0x398); + cabriolet_enable_ide(); } /* diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 2304648..4da596b 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -29,7 +29,7 @@ #include "irq_impl.h" #include "pci_impl.h" #include "machvec_impl.h" - +#include "pc873xx.h" /* Note mask bit is true for DISABLED irqs. */ static unsigned long cached_irq_mask[2] = { -1, -1 }; @@ -264,7 +264,14 @@ takara_init_pci(void) alpha_mv.pci_map_irq = takara_map_irq_srm; cia_init_pci(); - ns87312_enable_ide(0x26e); + + if (pc873xx_probe() == -1) { + printk(KERN_ERR "Probing for PC873xx Super IO chip failed.\n"); + } else { + printk(KERN_INFO "Found %s Super IO chip at 0x%x\n", + pc873xx_get_model(), pc873xx_get_base()); + pc873xx_enable_ide(); + } } diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 09acb78..ce594ef 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -512,6 +512,9 @@ sys_call_table: .quad sys_pwritev .quad sys_rt_tgsigqueueinfo .quad sys_perf_event_open + .quad sys_fanotify_init + .quad sys_fanotify_mark /* 495 */ + .quad sys_prlimit64 .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index eacceb2..396af17 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -191,16 +191,16 @@ irqreturn_t timer_interrupt(int irq, void *dev) write_sequnlock(&xtime_lock); -#ifndef CONFIG_SMP - while (nticks--) - update_process_times(user_mode(get_irq_regs())); -#endif - if (test_perf_event_pending()) { clear_perf_event_pending(); perf_event_do_pending(); } +#ifndef CONFIG_SMP + while (nticks--) + update_process_times(user_mode(get_irq_regs())); +#endif + return IRQ_HANDLED; } diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index b14f015..0414e02 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -623,7 +622,6 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, return; } - lock_kernel(); printk("Bad unaligned kernel access at %016lx: %p %lx %lu\n", pc, va, opcode, reg); do_exit(SIGSEGV); @@ -646,7 +644,6 @@ got_exception: * Yikes! No one to forward the exception to. * Since the registers are in a weird format, dump them ourselves. */ - lock_kernel(); printk("%s(%d): unhandled unaligned exception\n", current->comm, task_pid_nr(current)); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a7ed21f..553b7cf 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1576,96 +1576,6 @@ config AUTO_ZRELADDR 0xf8000000. This assumes the zImage being placed in the first 128MB from start of memory. -config ZRELADDR - hex "Physical address of the decompressed kernel image" - depends on !AUTO_ZRELADDR - default 0x00008000 if ARCH_BCMRING ||\ - ARCH_CNS3XXX ||\ - ARCH_DOVE ||\ - ARCH_EBSA110 ||\ - ARCH_FOOTBRIDGE ||\ - ARCH_INTEGRATOR ||\ - ARCH_IOP13XX ||\ - ARCH_IOP33X ||\ - ARCH_IXP2000 ||\ - ARCH_IXP23XX ||\ - ARCH_IXP4XX ||\ - ARCH_KIRKWOOD ||\ - ARCH_KS8695 ||\ - ARCH_LOKI ||\ - ARCH_MMP ||\ - ARCH_MV78XX0 ||\ - ARCH_NOMADIK ||\ - ARCH_NUC93X ||\ - ARCH_NS9XXX ||\ - ARCH_ORION5X ||\ - ARCH_SPEAR3XX ||\ - ARCH_SPEAR6XX ||\ - ARCH_U8500 ||\ - ARCH_VERSATILE ||\ - ARCH_W90X900 - default 0x08008000 if ARCH_MX1 ||\ - ARCH_SHARK - default 0x10008000 if ARCH_MSM ||\ - ARCH_OMAP1 ||\ - ARCH_RPC - default 0x20008000 if ARCH_S5P6440 ||\ - ARCH_S5P6442 ||\ - ARCH_S5PC100 ||\ - ARCH_S5PV210 - default 0x30008000 if ARCH_S3C2410 ||\ - ARCH_S3C2400 ||\ - ARCH_S3C2412 ||\ - ARCH_S3C2416 ||\ - ARCH_S3C2440 ||\ - ARCH_S3C2443 - default 0x40008000 if ARCH_STMP378X ||\ - ARCH_STMP37XX ||\ - ARCH_SH7372 ||\ - ARCH_SH7377 ||\ - ARCH_S5PV310 - default 0x50008000 if ARCH_S3C64XX ||\ - ARCH_SH7367 - default 0x60008000 if ARCH_VEXPRESS - default 0x80008000 if ARCH_MX25 ||\ - ARCH_MX3 ||\ - ARCH_NETX ||\ - ARCH_OMAP2PLUS ||\ - ARCH_PNX4008 - default 0x90008000 if ARCH_MX5 ||\ - ARCH_MX91231 - default 0xa0008000 if ARCH_IOP32X ||\ - ARCH_PXA ||\ - MACH_MX27 - default 0xc0008000 if ARCH_LH7A40X ||\ - MACH_MX21 - default 0xf0008000 if ARCH_AAEC2000 ||\ - ARCH_L7200 - default 0xc0028000 if ARCH_CLPS711X - default 0x70008000 if ARCH_AT91 && (ARCH_AT91CAP9 || ARCH_AT91SAM9G45) - default 0x20008000 if ARCH_AT91 && !(ARCH_AT91CAP9 || ARCH_AT91SAM9G45) - default 0xc0008000 if ARCH_DAVINCI && ARCH_DAVINCI_DA8XX - default 0x80008000 if ARCH_DAVINCI && !ARCH_DAVINCI_DA8XX - default 0x00008000 if ARCH_EP93XX && EP93XX_SDCE3_SYNC_PHYS_OFFSET - default 0xc0008000 if ARCH_EP93XX && EP93XX_SDCE0_PHYS_OFFSET - default 0xd0008000 if ARCH_EP93XX && EP93XX_SDCE1_PHYS_OFFSET - default 0xe0008000 if ARCH_EP93XX && EP93XX_SDCE2_PHYS_OFFSET - default 0xf0008000 if ARCH_EP93XX && EP93XX_SDCE3_ASYNC_PHYS_OFFSET - default 0x00008000 if ARCH_GEMINI && GEMINI_MEM_SWAP - default 0x10008000 if ARCH_GEMINI && !GEMINI_MEM_SWAP - default 0x70008000 if ARCH_REALVIEW && REALVIEW_HIGH_PHYS_OFFSET - default 0x00008000 if ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET - default 0xc0208000 if ARCH_SA1100 && SA1111 - default 0xc0008000 if ARCH_SA1100 && !SA1111 - default 0x30108000 if ARCH_S3C2410 && PM_H1940 - default 0x28E08000 if ARCH_U300 && MACH_U300_SINGLE_RAM - default 0x48008000 if ARCH_U300 && !MACH_U300_SINGLE_RAM - help - ZRELADDR is the physical address where the decompressed kernel - image will be placed. ZRELADDR has to be specified when the - assumption of AUTO_ZRELADDR is not valid, or when ZBOOT_ROM is - selected. - endmenu menu "CPU Power Management" diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile index f705213..4a590f4 100644 --- a/arch/arm/boot/Makefile +++ b/arch/arm/boot/Makefile @@ -14,16 +14,18 @@ MKIMAGE := $(srctree)/scripts/mkuboot.sh ifneq ($(MACHINE),) --include $(srctree)/$(MACHINE)/Makefile.boot +include $(srctree)/$(MACHINE)/Makefile.boot endif # Note: the following conditions must always be true: +# ZRELADDR == virt_to_phys(PAGE_OFFSET + TEXT_OFFSET) # PARAMS_PHYS must be within 4MB of ZRELADDR # INITRD_PHYS must be in RAM +ZRELADDR := $(zreladdr-y) PARAMS_PHYS := $(params_phys-y) INITRD_PHYS := $(initrd_phys-y) -export INITRD_PHYS PARAMS_PHYS +export ZRELADDR INITRD_PHYS PARAMS_PHYS targets := Image zImage xipImage bootpImage uImage @@ -65,7 +67,7 @@ quiet_cmd_uimage = UIMAGE $@ ifeq ($(CONFIG_ZBOOT_ROM),y) $(obj)/uImage: LOADADDR=$(CONFIG_ZBOOT_ROM_TEXT) else -$(obj)/uImage: LOADADDR=$(CONFIG_ZRELADDR) +$(obj)/uImage: LOADADDR=$(ZRELADDR) endif ifeq ($(CONFIG_THUMB2_KERNEL),y) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 68775e3..b23f6bc 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -79,6 +79,10 @@ endif EXTRA_CFLAGS := -fpic -fno-builtin EXTRA_AFLAGS := -Wa,-march=all +# Supply ZRELADDR to the decompressor via a linker symbol. +ifneq ($(CONFIG_AUTO_ZRELADDR),y) +LDFLAGS_vmlinux := --defsym zreladdr=$(ZRELADDR) +endif ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 endif diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 6af9907..6825c34 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -177,7 +177,7 @@ not_angel: and r4, pc, #0xf8000000 add r4, r4, #TEXT_OFFSET #else - ldr r4, =CONFIG_ZRELADDR + ldr r4, =zreladdr #endif subs r0, r0, r1 @ calculate the delta offset diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 6c09135..7974baa 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -263,6 +263,14 @@ static int it8152_pci_platform_notify_remove(struct device *dev) return 0; } +int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) +{ + dev_dbg(dev, "%s: dma_addr %08x, size %08x\n", + __func__, dma_addr, size); + return (dev->bus == &pci_bus_type) && + ((dma_addr + size - PHYS_OFFSET) >= SZ_64M); +} + int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) { it8152_io.start = IT8152_IO_BASE + 0x12000; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index c226fe1..c568da7 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -288,15 +288,7 @@ extern void dmabounce_unregister_dev(struct device *); * DMA access and 1 if the buffer needs to be bounced. * */ -#ifdef CONFIG_SA1111 extern int dma_needs_bounce(struct device*, dma_addr_t, size_t); -#else -static inline int dma_needs_bounce(struct device *dev, dma_addr_t addr, - size_t size) -{ - return 0; -} -#endif /* * The DMA API, implemented by dmabounce.c. See below for descriptions. diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 48837e6..b5799a3 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -17,7 +17,7 @@ * counter interrupts are regular interrupts and not an NMI. This * means that when we receive the interrupt we can call * perf_event_do_pending() that handles all of the work with - * interrupts enabled. + * interrupts disabled. */ static inline void set_perf_event_pending(void) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index d02cfb6..c891eb7 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -393,6 +393,9 @@ #define __NR_perf_event_open (__NR_SYSCALL_BASE+364) #define __NR_recvmmsg (__NR_SYSCALL_BASE+365) #define __NR_accept4 (__NR_SYSCALL_BASE+366) +#define __NR_fanotify_init (__NR_SYSCALL_BASE+367) +#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) +#define __NR_prlimit64 (__NR_SYSCALL_BASE+369) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index afeb71f..5c26ecc 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -376,6 +376,9 @@ CALL(sys_perf_event_open) /* 365 */ CALL(sys_recvmmsg) CALL(sys_accept4) + CALL(sys_fanotify_init) + CALL(sys_fanotify_mark) + CALL(sys_prlimit64) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index f05a35a..1b56082 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -418,11 +418,13 @@ ENDPROC(sys_clone_wrapper) sys_sigreturn_wrapper: add r0, sp, #S_OFF + mov why, #0 @ prevent syscall restart handling b sys_sigreturn ENDPROC(sys_sigreturn_wrapper) sys_rt_sigreturn_wrapper: add r0, sp, #S_OFF + mov why, #0 @ prevent syscall restart handling b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 417c392..ecbb028 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -319,8 +319,8 @@ validate_event(struct cpu_hw_events *cpuc, { struct hw_perf_event fake_event = event->hw; - if (event->pmu && event->pmu != &pmu) - return 0; + if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) + return 1; return armpmu->get_event_idx(cpuc, &fake_event) >= 0; } @@ -1041,8 +1041,8 @@ armv6pmu_handle_irq(int irq_num, /* * Handle the pending perf events. * - * Note: this call *must* be run with interrupts enabled. For - * platforms that can have the PMU interrupts raised as a PMI, this + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ perf_event_do_pending(); @@ -2017,8 +2017,8 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) /* * Handle the pending perf events. * - * Note: this call *must* be run with interrupts enabled. For - * platforms that can have the PMU interrupts raised as a PMI, this + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this * will not work. */ perf_event_do_pending(); diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c index 753c0d3..c67b47f 100644 --- a/arch/arm/mach-at91/at91sam9g45.c +++ b/arch/arm/mach-at91/at91sam9g45.c @@ -121,8 +121,8 @@ static struct clk ssc1_clk = { .pmc_mask = 1 << AT91SAM9G45_ID_SSC1, .type = CLK_TYPE_PERIPHERAL, }; -static struct clk tcb_clk = { - .name = "tcb_clk", +static struct clk tcb0_clk = { + .name = "tcb0_clk", .pmc_mask = 1 << AT91SAM9G45_ID_TCB, .type = CLK_TYPE_PERIPHERAL, }; @@ -192,6 +192,14 @@ static struct clk ohci_clk = { .parent = &uhphs_clk, }; +/* One additional fake clock for second TC block */ +static struct clk tcb1_clk = { + .name = "tcb1_clk", + .pmc_mask = 0, + .type = CLK_TYPE_PERIPHERAL, + .parent = &tcb0_clk, +}; + static struct clk *periph_clocks[] __initdata = { &pioA_clk, &pioB_clk, @@ -208,7 +216,7 @@ static struct clk *periph_clocks[] __initdata = { &spi1_clk, &ssc0_clk, &ssc1_clk, - &tcb_clk, + &tcb0_clk, &pwm_clk, &tsc_clk, &dma_clk, @@ -221,6 +229,7 @@ static struct clk *periph_clocks[] __initdata = { &mmc1_clk, // irq0 &ohci_clk, + &tcb1_clk, }; /* diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 809114d..5e71ccd 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -46,7 +46,7 @@ static struct resource hdmac_resources[] = { .end = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1, .flags = IORESOURCE_MEM, }, - [2] = { + [1] = { .start = AT91SAM9G45_ID_DMA, .end = AT91SAM9G45_ID_DMA, .flags = IORESOURCE_IRQ, @@ -835,9 +835,9 @@ static struct platform_device at91sam9g45_tcb1_device = { static void __init at91_add_device_tc(void) { /* this chip has one clock and irq for all six TC channels */ - at91_clock_associate("tcb_clk", &at91sam9g45_tcb0_device.dev, "t0_clk"); + at91_clock_associate("tcb0_clk", &at91sam9g45_tcb0_device.dev, "t0_clk"); platform_device_register(&at91sam9g45_tcb0_device); - at91_clock_associate("tcb_clk", &at91sam9g45_tcb1_device.dev, "t0_clk"); + at91_clock_associate("tcb1_clk", &at91sam9g45_tcb1_device.dev, "t0_clk"); platform_device_register(&at91sam9g45_tcb1_device); } #else diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c index c4c8865..65eb094 100644 --- a/arch/arm/mach-at91/board-sam9261ek.c +++ b/arch/arm/mach-at91/board-sam9261ek.c @@ -93,11 +93,12 @@ static struct resource dm9000_resource[] = { .start = AT91_PIN_PC11, .end = AT91_PIN_PC11, .flags = IORESOURCE_IRQ + | IORESOURCE_IRQ_LOWEDGE | IORESOURCE_IRQ_HIGHEDGE, } }; static struct dm9000_plat_data dm9000_platdata = { - .flags = DM9000_PLATF_16BITONLY, + .flags = DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM, }; static struct platform_device dm9000_device = { @@ -168,17 +169,6 @@ static struct at91_udc_data __initdata ek_udc_data = { /* - * MCI (SD/MMC) - */ -static struct at91_mmc_data __initdata ek_mmc_data = { - .wire4 = 1, -// .det_pin = ... not connected -// .wp_pin = ... not connected -// .vcc_pin = ... not connected -}; - - -/* * NAND flash */ static struct mtd_partition __initdata ek_nand_partition[] = { @@ -246,6 +236,10 @@ static void __init ek_add_device_nand(void) at91_add_device_nand(&ek_nand_data); } +/* + * SPI related devices + */ +#if defined(CONFIG_SPI_ATMEL) || defined(CONFIG_SPI_ATMEL_MODULE) /* * ADS7846 Touchscreen @@ -356,6 +350,19 @@ static struct spi_board_info ek_spi_devices[] = { #endif }; +#else /* CONFIG_SPI_ATMEL_* */ +/* spi0 and mmc/sd share the same PIO pins: cannot be used at the same time */ + +/* + * MCI (SD/MMC) + * det_pin, wp_pin and vcc_pin are not connected + */ +static struct at91_mmc_data __initdata ek_mmc_data = { + .wire4 = 1, +}; + +#endif /* CONFIG_SPI_ATMEL_* */ + /* * LCD Controller diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 7f7da43..7525cee 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -501,7 +501,8 @@ postcore_initcall(at91_clk_debugfs_init); int __init clk_register(struct clk *clk) { if (clk_is_peripheral(clk)) { - clk->parent = &mck; + if (!clk->parent) + clk->parent = &mck; clk->mode = pmc_periph_mode; list_add_tail(&clk->node, &clocks); } diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 8bf3cec..4566bd1 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -560,4 +560,4 @@ static int __init ep93xx_clock_init(void) clkdev_add_table(clocks, ARRAY_SIZE(clocks)); return 0; } -arch_initcall(ep93xx_clock_init); +postcore_initcall(ep93xx_clock_init); diff --git a/arch/arm/mach-mx25/eukrea_mbimxsd-baseboard.c b/arch/arm/mach-mx25/eukrea_mbimxsd-baseboard.c index 91931dc..4aaadc7 100644 --- a/arch/arm/mach-mx25/eukrea_mbimxsd-baseboard.c +++ b/arch/arm/mach-mx25/eukrea_mbimxsd-baseboard.c @@ -215,7 +215,7 @@ struct imx_ssi_platform_data eukrea_mbimxsd_ssi_pdata = { * Add platform devices present on this baseboard and init * them from CPU side as far as required to use them later on */ -void __init eukrea_mbimxsd_baseboard_init(void) +void __init eukrea_mbimxsd25_baseboard_init(void) { if (mxc_iomux_v3_setup_multiple_pads(eukrea_mbimxsd_pads, ARRAY_SIZE(eukrea_mbimxsd_pads))) diff --git a/arch/arm/mach-mx25/mach-cpuimx25.c b/arch/arm/mach-mx25/mach-cpuimx25.c index a5f0174..e064bb3 100644 --- a/arch/arm/mach-mx25/mach-cpuimx25.c +++ b/arch/arm/mach-mx25/mach-cpuimx25.c @@ -147,8 +147,8 @@ static void __init eukrea_cpuimx25_init(void) if (!otg_mode_host) mxc_register_device(&otg_udc_device, &otg_device_pdata); -#ifdef CONFIG_MACH_EUKREA_MBIMXSD_BASEBOARD - eukrea_mbimxsd_baseboard_init(); +#ifdef CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD + eukrea_mbimxsd25_baseboard_init(); #endif } diff --git a/arch/arm/mach-mx3/clock-imx35.c b/arch/arm/mach-mx3/clock-imx35.c index d3af0fd..7a62e74 100644 --- a/arch/arm/mach-mx3/clock-imx35.c +++ b/arch/arm/mach-mx3/clock-imx35.c @@ -155,7 +155,7 @@ static unsigned long get_rate_arm(void) aad = &clk_consumer[(pdr0 >> 16) & 0xf]; if (aad->sel) - fref = fref * 2 / 3; + fref = fref * 3 / 4; return fref / aad->arm; } @@ -164,7 +164,7 @@ static unsigned long get_rate_ahb(struct clk *clk) { unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0); struct arm_ahb_div *aad; - unsigned long fref = get_rate_mpll(); + unsigned long fref = get_rate_arm(); aad = &clk_consumer[(pdr0 >> 16) & 0xf]; @@ -176,16 +176,11 @@ static unsigned long get_rate_ipg(struct clk *clk) return get_rate_ahb(NULL) >> 1; } -static unsigned long get_3_3_div(unsigned long in) -{ - return (((in >> 3) & 0x7) + 1) * ((in & 0x7) + 1); -} - static unsigned long get_rate_uart(struct clk *clk) { unsigned long pdr3 = __raw_readl(CCM_BASE + CCM_PDR3); unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4); - unsigned long div = get_3_3_div(pdr4 >> 10); + unsigned long div = ((pdr4 >> 10) & 0x3f) + 1; if (pdr3 & (1 << 14)) return get_rate_arm() / div; @@ -216,7 +211,7 @@ static unsigned long get_rate_sdhc(struct clk *clk) break; } - return rate / get_3_3_div(div); + return rate / (div + 1); } static unsigned long get_rate_mshc(struct clk *clk) @@ -270,7 +265,7 @@ static unsigned long get_rate_csi(struct clk *clk) else rate = get_rate_ppll(); - return rate / get_3_3_div((pdr2 >> 16) & 0x3f); + return rate / (((pdr2 >> 16) & 0x3f) + 1); } static unsigned long get_rate_otg(struct clk *clk) @@ -283,25 +278,51 @@ static unsigned long get_rate_otg(struct clk *clk) else rate = get_rate_ppll(); - return rate / get_3_3_div((pdr4 >> 22) & 0x3f); + return rate / (((pdr4 >> 22) & 0x3f) + 1); } static unsigned long get_rate_ipg_per(struct clk *clk) { unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0); unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4); - unsigned long div1, div2; + unsigned long div; if (pdr0 & (1 << 26)) { - div1 = (pdr4 >> 19) & 0x7; - div2 = (pdr4 >> 16) & 0x7; - return get_rate_arm() / ((div1 + 1) * (div2 + 1)); + div = (pdr4 >> 16) & 0x3f; + return get_rate_arm() / (div + 1); } else { - div1 = (pdr0 >> 12) & 0x7; - return get_rate_ahb(NULL) / div1; + div = (pdr0 >> 12) & 0x7; + return get_rate_ahb(NULL) / (div + 1); } } +static unsigned long get_rate_hsp(struct clk *clk) +{ + unsigned long hsp_podf = (__raw_readl(CCM_BASE + CCM_PDR0) >> 20) & 0x03; + unsigned long fref = get_rate_mpll(); + + if (fref > 400 * 1000 * 1000) { + switch (hsp_podf) { + case 0: + return fref >> 2; + case 1: + return fref >> 3; + case 2: + return fref / 3; + } + } else { + switch (hsp_podf) { + case 0: + case 2: + return fref / 3; + case 1: + return fref / 6; + } + } + + return 0; +} + static int clk_cgr_enable(struct clk *clk) { u32 reg; @@ -359,7 +380,7 @@ DEFINE_CLOCK(i2c1_clk, 0, CCM_CGR1, 10, get_rate_ipg_per, NULL); DEFINE_CLOCK(i2c2_clk, 1, CCM_CGR1, 12, get_rate_ipg_per, NULL); DEFINE_CLOCK(i2c3_clk, 2, CCM_CGR1, 14, get_rate_ipg_per, NULL); DEFINE_CLOCK(iomuxc_clk, 0, CCM_CGR1, 16, NULL, NULL); -DEFINE_CLOCK(ipu_clk, 0, CCM_CGR1, 18, get_rate_ahb, NULL); +DEFINE_CLOCK(ipu_clk, 0, CCM_CGR1, 18, get_rate_hsp, NULL); DEFINE_CLOCK(kpp_clk, 0, CCM_CGR1, 20, get_rate_ipg, NULL); DEFINE_CLOCK(mlb_clk, 0, CCM_CGR1, 22, get_rate_ahb, NULL); DEFINE_CLOCK(mshc_clk, 0, CCM_CGR1, 24, get_rate_mshc, NULL); @@ -485,10 +506,10 @@ static struct clk_lookup lookups[] = { int __init mx35_clocks_init() { - unsigned int ll = 0; + unsigned int cgr2 = 3 << 26, cgr3 = 0; #if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC) - ll = (3 << 16); + cgr2 |= 3 << 16; #endif clkdev_add_table(lookups, ARRAY_SIZE(lookups)); @@ -499,8 +520,20 @@ int __init mx35_clocks_init() __raw_writel((3 << 18), CCM_BASE + CCM_CGR0); __raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16), CCM_BASE + CCM_CGR1); - __raw_writel((3 << 26) | ll, CCM_BASE + CCM_CGR2); - __raw_writel(0, CCM_BASE + CCM_CGR3); + + /* + * Check if we came up in internal boot mode. If yes, we need some + * extra clocks turned on, otherwise the MX35 boot ROM code will + * hang after a watchdog reset. + */ + if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) { + /* Additionally turn on UART1, SCC, and IIM clocks */ + cgr2 |= 3 << 16 | 3 << 4; + cgr3 |= 3 << 2; + } + + __raw_writel(cgr2, CCM_BASE + CCM_CGR2); + __raw_writel(cgr3, CCM_BASE + CCM_CGR3); mxc_timer_init(&gpt_clk, MX35_IO_ADDRESS(MX35_GPT1_BASE_ADDR), MX35_INT_GPT); diff --git a/arch/arm/mach-mx3/eukrea_mbimxsd-baseboard.c b/arch/arm/mach-mx3/eukrea_mbimxsd-baseboard.c index 1dc5004..f8f15e3 100644 --- a/arch/arm/mach-mx3/eukrea_mbimxsd-baseboard.c +++ b/arch/arm/mach-mx3/eukrea_mbimxsd-baseboard.c @@ -216,7 +216,7 @@ struct imx_ssi_platform_data eukrea_mbimxsd_ssi_pdata = { * Add platform devices present on this baseboard and init * them from CPU side as far as required to use them later on */ -void __init eukrea_mbimxsd_baseboard_init(void) +void __init eukrea_mbimxsd35_baseboard_init(void) { if (mxc_iomux_v3_setup_multiple_pads(eukrea_mbimxsd_pads, ARRAY_SIZE(eukrea_mbimxsd_pads))) diff --git a/arch/arm/mach-mx3/mach-cpuimx35.c b/arch/arm/mach-mx3/mach-cpuimx35.c index 9770a6a..2a4f8b7 100644 --- a/arch/arm/mach-mx3/mach-cpuimx35.c +++ b/arch/arm/mach-mx3/mach-cpuimx35.c @@ -201,8 +201,8 @@ static void __init mxc_board_init(void) if (!otg_mode_host) mxc_register_device(&mxc_otg_udc_device, &otg_device_pdata); -#ifdef CONFIG_MACH_EUKREA_MBIMXSD_BASEBOARD - eukrea_mbimxsd_baseboard_init(); +#ifdef CONFIG_MACH_EUKREA_MBIMXSD35_BASEBOARD + eukrea_mbimxsd35_baseboard_init(); #endif } diff --git a/arch/arm/mach-mx5/clock-mx51.c b/arch/arm/mach-mx5/clock-mx51.c index 6af69de..57c10a9 100644 --- a/arch/arm/mach-mx5/clock-mx51.c +++ b/arch/arm/mach-mx5/clock-mx51.c @@ -56,7 +56,7 @@ static void _clk_ccgr_disable(struct clk *clk) { u32 reg; reg = __raw_readl(clk->enable_reg); - reg &= ~(MXC_CCM_CCGRx_MOD_OFF << clk->enable_shift); + reg &= ~(MXC_CCM_CCGRx_CG_MASK << clk->enable_shift); __raw_writel(reg, clk->enable_reg); } diff --git a/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/arch/arm/mach-pxa/cpufreq-pxa2xx.c index 268a9bc..50d5939 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa2xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa2xx.c @@ -398,7 +398,7 @@ static int pxa_set_target(struct cpufreq_policy *policy, return 0; } -static __init int pxa_cpufreq_init(struct cpufreq_policy *policy) +static int pxa_cpufreq_init(struct cpufreq_policy *policy) { int i; unsigned int freq; diff --git a/arch/arm/mach-pxa/cpufreq-pxa3xx.c b/arch/arm/mach-pxa/cpufreq-pxa3xx.c index 27fa329..0a0d0fe 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa3xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa3xx.c @@ -204,7 +204,7 @@ static int pxa3xx_cpufreq_set(struct cpufreq_policy *policy, return 0; } -static __init int pxa3xx_cpufreq_init(struct cpufreq_policy *policy) +static int pxa3xx_cpufreq_init(struct cpufreq_policy *policy) { int ret = -EINVAL; diff --git a/arch/arm/mach-pxa/include/mach/mfp-pxa300.h b/arch/arm/mach-pxa/include/mach/mfp-pxa300.h index 7139e0d..4e12870 100644 --- a/arch/arm/mach-pxa/include/mach/mfp-pxa300.h +++ b/arch/arm/mach-pxa/include/mach/mfp-pxa300.h @@ -71,10 +71,10 @@ #define GPIO46_CI_DD_7 MFP_CFG_DRV(GPIO46, AF0, DS04X) #define GPIO47_CI_DD_8 MFP_CFG_DRV(GPIO47, AF1, DS04X) #define GPIO48_CI_DD_9 MFP_CFG_DRV(GPIO48, AF1, DS04X) -#define GPIO52_CI_HSYNC MFP_CFG_DRV(GPIO52, AF0, DS04X) -#define GPIO51_CI_VSYNC MFP_CFG_DRV(GPIO51, AF0, DS04X) #define GPIO49_CI_MCLK MFP_CFG_DRV(GPIO49, AF0, DS04X) #define GPIO50_CI_PCLK MFP_CFG_DRV(GPIO50, AF0, DS04X) +#define GPIO51_CI_HSYNC MFP_CFG_DRV(GPIO51, AF0, DS04X) +#define GPIO52_CI_VSYNC MFP_CFG_DRV(GPIO52, AF0, DS04X) /* KEYPAD */ #define GPIO3_KP_DKIN_6 MFP_CFG_LPM(GPIO3, AF2, FLOAT) diff --git a/arch/arm/mach-s3c64xx/dev-spi.c b/arch/arm/mach-s3c64xx/dev-spi.c index a492b98..405e621 100644 --- a/arch/arm/mach-s3c64xx/dev-spi.c +++ b/arch/arm/mach-s3c64xx/dev-spi.c @@ -18,10 +18,11 @@ #include #include #include +#include #include #include -#include +#include static char *spi_src_clks[] = { [S3C64XX_SPI_SRCCLK_PCLK] = "pclk", diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c index 5c07d01..e130379 100644 --- a/arch/arm/mach-s3c64xx/mach-real6410.c +++ b/arch/arm/mach-s3c64xx/mach-real6410.c @@ -30,73 +30,73 @@ #include #include -#define UCON S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK -#define ULCON S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB -#define UFCON S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE +#define UCON (S3C2410_UCON_DEFAULT | S3C2410_UCON_UCLK) +#define ULCON (S3C2410_LCON_CS8 | S3C2410_LCON_PNONE | S3C2410_LCON_STOPB) +#define UFCON (S3C2410_UFCON_RXTRIG8 | S3C2410_UFCON_FIFOMODE) static struct s3c2410_uartcfg real6410_uartcfgs[] __initdata = { [0] = { - .hwport = 0, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 0, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, [1] = { - .hwport = 1, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 1, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, [2] = { - .hwport = 2, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 2, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, [3] = { - .hwport = 3, - .flags = 0, - .ucon = UCON, - .ulcon = ULCON, - .ufcon = UFCON, + .hwport = 3, + .flags = 0, + .ucon = UCON, + .ulcon = ULCON, + .ufcon = UFCON, }, }; /* DM9000AEP 10/100 ethernet controller */ static struct resource real6410_dm9k_resource[] = { - [0] = { - .start = S3C64XX_PA_XM0CSN1, - .end = S3C64XX_PA_XM0CSN1 + 1, - .flags = IORESOURCE_MEM - }, - [1] = { - .start = S3C64XX_PA_XM0CSN1 + 4, - .end = S3C64XX_PA_XM0CSN1 + 5, - .flags = IORESOURCE_MEM - }, - [2] = { - .start = S3C_EINT(7), - .end = S3C_EINT(7), - .flags = IORESOURCE_IRQ, - } + [0] = { + .start = S3C64XX_PA_XM0CSN1, + .end = S3C64XX_PA_XM0CSN1 + 1, + .flags = IORESOURCE_MEM + }, + [1] = { + .start = S3C64XX_PA_XM0CSN1 + 4, + .end = S3C64XX_PA_XM0CSN1 + 5, + .flags = IORESOURCE_MEM + }, + [2] = { + .start = S3C_EINT(7), + .end = S3C_EINT(7), + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL + } }; static struct dm9000_plat_data real6410_dm9k_pdata = { - .flags = (DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM), + .flags = (DM9000_PLATF_16BITONLY | DM9000_PLATF_NO_EEPROM), }; static struct platform_device real6410_device_eth = { - .name = "dm9000", - .id = -1, - .num_resources = ARRAY_SIZE(real6410_dm9k_resource), - .resource = real6410_dm9k_resource, - .dev = { - .platform_data = &real6410_dm9k_pdata, - }, + .name = "dm9000", + .id = -1, + .num_resources = ARRAY_SIZE(real6410_dm9k_resource), + .resource = real6410_dm9k_resource, + .dev = { + .platform_data = &real6410_dm9k_pdata, + }, }; static struct platform_device *real6410_devices[] __initdata = { @@ -129,12 +129,12 @@ static void __init real6410_machine_init(void) /* set timing for nCS1 suitable for ethernet chip */ __raw_writel((0 << S3C64XX_SROM_BCX__PMC__SHIFT) | - (6 << S3C64XX_SROM_BCX__TACP__SHIFT) | - (4 << S3C64XX_SROM_BCX__TCAH__SHIFT) | - (1 << S3C64XX_SROM_BCX__TCOH__SHIFT) | - (13 << S3C64XX_SROM_BCX__TACC__SHIFT) | - (4 << S3C64XX_SROM_BCX__TCOS__SHIFT) | - (0 << S3C64XX_SROM_BCX__TACS__SHIFT), S3C64XX_SROM_BC1); + (6 << S3C64XX_SROM_BCX__TACP__SHIFT) | + (4 << S3C64XX_SROM_BCX__TCAH__SHIFT) | + (1 << S3C64XX_SROM_BCX__TCOH__SHIFT) | + (13 << S3C64XX_SROM_BCX__TACC__SHIFT) | + (4 << S3C64XX_SROM_BCX__TCOS__SHIFT) | + (0 << S3C64XX_SROM_BCX__TACS__SHIFT), S3C64XX_SROM_BC1); platform_add_devices(real6410_devices, ARRAY_SIZE(real6410_devices)); } diff --git a/arch/arm/mach-s5pv210/clock.c b/arch/arm/mach-s5pv210/clock.c index af91fef..cfecd70 100644 --- a/arch/arm/mach-s5pv210/clock.c +++ b/arch/arm/mach-s5pv210/clock.c @@ -281,6 +281,24 @@ static struct clk init_clocks_disable[] = { .enable = s5pv210_clk_ip0_ctrl, .ctrlbit = (1<<29), }, { + .name = "fimc", + .id = 0, + .parent = &clk_hclk_dsys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 24), + }, { + .name = "fimc", + .id = 1, + .parent = &clk_hclk_dsys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 25), + }, { + .name = "fimc", + .id = 2, + .parent = &clk_hclk_dsys.clk, + .enable = s5pv210_clk_ip0_ctrl, + .ctrlbit = (1 << 26), + }, { .name = "otg", .id = -1, .parent = &clk_hclk_psys.clk, @@ -357,7 +375,7 @@ static struct clk init_clocks_disable[] = { .id = 1, .parent = &clk_pclk_psys.clk, .enable = s5pv210_clk_ip3_ctrl, - .ctrlbit = (1<<8), + .ctrlbit = (1 << 10), }, { .name = "i2c", .id = 2, diff --git a/arch/arm/mach-s5pv210/cpu.c b/arch/arm/mach-s5pv210/cpu.c index b9f4d67..77f456c 100644 --- a/arch/arm/mach-s5pv210/cpu.c +++ b/arch/arm/mach-s5pv210/cpu.c @@ -47,7 +47,7 @@ static struct map_desc s5pv210_iodesc[] __initdata = { { .virtual = (unsigned long)S5P_VA_SYSTIMER, .pfn = __phys_to_pfn(S5PV210_PA_SYSTIMER), - .length = SZ_1M, + .length = SZ_4K, .type = MT_DEVICE, }, { .virtual = (unsigned long)VA_VIC2, diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile index 5e16b4c..ae416fe 100644 --- a/arch/arm/mach-shmobile/Makefile +++ b/arch/arm/mach-shmobile/Makefile @@ -3,7 +3,7 @@ # # Common objects -obj-y := timer.o console.o clock.o +obj-y := timer.o console.o clock.o pm_runtime.o # CPU objects obj-$(CONFIG_ARCH_SH7367) += setup-sh7367.o clock-sh7367.o intc-sh7367.o diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 23d472f..95935c8 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -307,6 +309,7 @@ static struct sh_mobile_sdhi_info sdhi1_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI1_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI1_RX, .tmio_ocr_mask = MMC_VDD_165_195, + .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, }; static struct resource sdhi1_resources[] = { @@ -558,7 +561,7 @@ static struct resource fsi_resources[] = { static struct platform_device fsi_device = { .name = "sh_fsi2", - .id = 0, + .id = -1, .num_resources = ARRAY_SIZE(fsi_resources), .resource = fsi_resources, .dev = { @@ -650,7 +653,44 @@ static struct platform_device hdmi_device = { }, }; +static struct gpio_led ap4evb_leds[] = { + { + .name = "led4", + .gpio = GPIO_PORT185, + .default_state = LEDS_GPIO_DEFSTATE_ON, + }, + { + .name = "led2", + .gpio = GPIO_PORT186, + .default_state = LEDS_GPIO_DEFSTATE_ON, + }, + { + .name = "led3", + .gpio = GPIO_PORT187, + .default_state = LEDS_GPIO_DEFSTATE_ON, + }, + { + .name = "led1", + .gpio = GPIO_PORT188, + .default_state = LEDS_GPIO_DEFSTATE_ON, + } +}; + +static struct gpio_led_platform_data ap4evb_leds_pdata = { + .num_leds = ARRAY_SIZE(ap4evb_leds), + .leds = ap4evb_leds, +}; + +static struct platform_device leds_device = { + .name = "leds-gpio", + .id = 0, + .dev = { + .platform_data = &ap4evb_leds_pdata, + }, +}; + static struct platform_device *ap4evb_devices[] __initdata = { + &leds_device, &nor_flash_device, &smc911x_device, &sdhi0_device, @@ -840,20 +880,6 @@ static void __init ap4evb_init(void) gpio_request(GPIO_FN_CS5A, NULL); gpio_request(GPIO_FN_IRQ6_39, NULL); - /* enable LED 1 - 4 */ - gpio_request(GPIO_PORT185, NULL); - gpio_request(GPIO_PORT186, NULL); - gpio_request(GPIO_PORT187, NULL); - gpio_request(GPIO_PORT188, NULL); - gpio_direction_output(GPIO_PORT185, 1); - gpio_direction_output(GPIO_PORT186, 1); - gpio_direction_output(GPIO_PORT187, 1); - gpio_direction_output(GPIO_PORT188, 1); - gpio_export(GPIO_PORT185, 0); - gpio_export(GPIO_PORT186, 0); - gpio_export(GPIO_PORT187, 0); - gpio_export(GPIO_PORT188, 0); - /* enable Debug switch (S6) */ gpio_request(GPIO_PORT32, NULL); gpio_request(GPIO_PORT33, NULL); diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index fb4e9b1..7594689 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -286,7 +286,6 @@ static struct clk_ops pllc2_clk_ops = { struct clk pllc2_clk = { .ops = &pllc2_clk_ops, - .flags = CLK_ENABLE_ON_INIT, .parent = &extal1_div2_clk, .freq_table = pllc2_freq_table, .parent_table = pllc2_parent, @@ -395,7 +394,7 @@ static struct clk div6_reparent_clks[DIV6_REPARENT_NR] = { enum { MSTP001, MSTP131, MSTP130, - MSTP129, MSTP128, + MSTP129, MSTP128, MSTP127, MSTP126, MSTP118, MSTP117, MSTP116, MSTP106, MSTP101, MSTP100, MSTP223, @@ -413,6 +412,8 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP130] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 30, 0), /* VEU2 */ [MSTP129] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 29, 0), /* VEU1 */ [MSTP128] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 28, 0), /* VEU0 */ + [MSTP127] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 27, 0), /* CEU */ + [MSTP126] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 26, 0), /* CSI2 */ [MSTP118] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 18, 0), /* DSITX */ [MSTP117] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 17, 0), /* LCDC1 */ [MSTP116] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR1, 16, 0), /* IIC0 */ @@ -428,7 +429,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP201] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 1, 0), /* SCIFA3 */ [MSTP200] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 0, 0), /* SCIFA4 */ [MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */ - [MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, CLK_ENABLE_ON_INIT), /* FSIA */ + [MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, 0), /* FSIA */ [MSTP323] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 23, 0), /* IIC1 */ [MSTP322] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 22, 0), /* USB0 */ [MSTP314] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 14, 0), /* SDHI0 */ @@ -498,6 +499,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("uio_pdrv_genirq.3", &mstp_clks[MSTP130]), /* VEU2 */ CLKDEV_DEV_ID("uio_pdrv_genirq.2", &mstp_clks[MSTP129]), /* VEU1 */ CLKDEV_DEV_ID("uio_pdrv_genirq.1", &mstp_clks[MSTP128]), /* VEU0 */ + CLKDEV_DEV_ID("sh_mobile_ceu.0", &mstp_clks[MSTP127]), /* CEU */ + CLKDEV_DEV_ID("sh-mobile-csi2.0", &mstp_clks[MSTP126]), /* CSI2 */ CLKDEV_DEV_ID("sh-mipi-dsi.0", &mstp_clks[MSTP118]), /* DSITX */ CLKDEV_DEV_ID("sh_mobile_lcdc_fb.1", &mstp_clks[MSTP117]), /* LCDC1 */ CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* IIC0 */ diff --git a/arch/arm/mach-shmobile/clock.c b/arch/arm/mach-shmobile/clock.c index b7c705a..6b7c7c4 100644 --- a/arch/arm/mach-shmobile/clock.c +++ b/arch/arm/mach-shmobile/clock.c @@ -1,8 +1,10 @@ /* - * SH-Mobile Timer + * SH-Mobile Clock Framework * * Copyright (C) 2010 Magnus Damm * + * Used together with arch/arm/common/clkdev.c and drivers/sh/clk.c. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. diff --git a/arch/arm/mach-shmobile/pm_runtime.c b/arch/arm/mach-shmobile/pm_runtime.c new file mode 100644 index 0000000..94912d3 --- /dev/null +++ b/arch/arm/mach-shmobile/pm_runtime.c @@ -0,0 +1,169 @@ +/* + * arch/arm/mach-shmobile/pm_runtime.c + * + * Runtime PM support code for SuperH Mobile ARM + * + * Copyright (C) 2009-2010 Magnus Damm + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PM_RUNTIME +#define BIT_ONCE 0 +#define BIT_ACTIVE 1 +#define BIT_CLK_ENABLED 2 + +struct pm_runtime_data { + unsigned long flags; + struct clk *clk; +}; + +static void __devres_release(struct device *dev, void *res) +{ + struct pm_runtime_data *prd = res; + + dev_dbg(dev, "__devres_release()\n"); + + if (test_bit(BIT_CLK_ENABLED, &prd->flags)) + clk_disable(prd->clk); + + if (test_bit(BIT_ACTIVE, &prd->flags)) + clk_put(prd->clk); +} + +static struct pm_runtime_data *__to_prd(struct device *dev) +{ + return devres_find(dev, __devres_release, NULL, NULL); +} + +static void platform_pm_runtime_init(struct device *dev, + struct pm_runtime_data *prd) +{ + if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) { + prd->clk = clk_get(dev, NULL); + if (!IS_ERR(prd->clk)) { + set_bit(BIT_ACTIVE, &prd->flags); + dev_info(dev, "clocks managed by runtime pm\n"); + } + } +} + +static void platform_pm_runtime_bug(struct device *dev, + struct pm_runtime_data *prd) +{ + if (prd && !test_and_set_bit(BIT_ONCE, &prd->flags)) + dev_err(dev, "runtime pm suspend before resume\n"); +} + +int platform_pm_runtime_suspend(struct device *dev) +{ + struct pm_runtime_data *prd = __to_prd(dev); + + dev_dbg(dev, "platform_pm_runtime_suspend()\n"); + + platform_pm_runtime_bug(dev, prd); + + if (prd && test_bit(BIT_ACTIVE, &prd->flags)) { + clk_disable(prd->clk); + clear_bit(BIT_CLK_ENABLED, &prd->flags); + } + + return 0; +} + +int platform_pm_runtime_resume(struct device *dev) +{ + struct pm_runtime_data *prd = __to_prd(dev); + + dev_dbg(dev, "platform_pm_runtime_resume()\n"); + + platform_pm_runtime_init(dev, prd); + + if (prd && test_bit(BIT_ACTIVE, &prd->flags)) { + clk_enable(prd->clk); + set_bit(BIT_CLK_ENABLED, &prd->flags); + } + + return 0; +} + +int platform_pm_runtime_idle(struct device *dev) +{ + /* suspend synchronously to disable clocks immediately */ + return pm_runtime_suspend(dev); +} + +static int platform_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pm_runtime_data *prd; + + dev_dbg(dev, "platform_bus_notify() %ld !\n", action); + + if (action == BUS_NOTIFY_BIND_DRIVER) { + prd = devres_alloc(__devres_release, sizeof(*prd), GFP_KERNEL); + if (prd) + devres_add(dev, prd); + else + dev_err(dev, "unable to alloc memory for runtime pm\n"); + } + + return 0; +} + +#else /* CONFIG_PM_RUNTIME */ + +static int platform_bus_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct clk *clk; + + dev_dbg(dev, "platform_bus_notify() %ld !\n", action); + + switch (action) { + case BUS_NOTIFY_BIND_DRIVER: + clk = clk_get(dev, NULL); + if (!IS_ERR(clk)) { + clk_enable(clk); + clk_put(clk); + dev_info(dev, "runtime pm disabled, clock forced on\n"); + } + break; + case BUS_NOTIFY_UNBOUND_DRIVER: + clk = clk_get(dev, NULL); + if (!IS_ERR(clk)) { + clk_disable(clk); + clk_put(clk); + dev_info(dev, "runtime pm disabled, clock forced off\n"); + } + break; + } + + return 0; +} + +#endif /* CONFIG_PM_RUNTIME */ + +static struct notifier_block platform_bus_notifier = { + .notifier_call = platform_bus_notify +}; + +static int __init sh_pm_runtime_init(void) +{ + bus_register_notifier(&platform_bus_type, &platform_bus_notifier); + return 0; +} +core_initcall(sh_pm_runtime_init); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 33c3f57..a0a2928 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -398,7 +398,7 @@ config CPU_V6 # ARMv6k config CPU_32v6K bool "Support ARM V6K processor extensions" if !SMP - depends on CPU_V6 + depends on CPU_V6 || CPU_V7 default y if SMP && !(ARCH_MX3 || ARCH_OMAP2) help Say Y here if your ARMv6 processor supports the 'K' extension. diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c704eed..4bc43e5 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -229,6 +229,8 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) } } while (size -= PAGE_SIZE); + dsb(); + return (void *)c->vm_start; } return NULL; diff --git a/arch/arm/plat-mxc/Kconfig b/arch/arm/plat-mxc/Kconfig index 0527e65..6785db4 100644 --- a/arch/arm/plat-mxc/Kconfig +++ b/arch/arm/plat-mxc/Kconfig @@ -43,6 +43,7 @@ config ARCH_MXC91231 config ARCH_MX5 bool "MX5-based" select CPU_V7 + select ARM_L1_CACHE_SHIFT_6 help This enables support for systems based on the Freescale i.MX51 family diff --git a/arch/arm/plat-mxc/include/mach/eukrea-baseboards.h b/arch/arm/plat-mxc/include/mach/eukrea-baseboards.h index 634e3f4..656acb4 100644 --- a/arch/arm/plat-mxc/include/mach/eukrea-baseboards.h +++ b/arch/arm/plat-mxc/include/mach/eukrea-baseboards.h @@ -37,9 +37,9 @@ * mach-mx5/eukrea_mbimx51-baseboard.c for cpuimx51 */ -extern void eukrea_mbimx25_baseboard_init(void); +extern void eukrea_mbimxsd25_baseboard_init(void); extern void eukrea_mbimx27_baseboard_init(void); -extern void eukrea_mbimx35_baseboard_init(void); +extern void eukrea_mbimxsd35_baseboard_init(void); extern void eukrea_mbimx51_baseboard_init(void); #endif diff --git a/arch/arm/plat-mxc/tzic.c b/arch/arm/plat-mxc/tzic.c index b3da9aa..3703ab2 100644 --- a/arch/arm/plat-mxc/tzic.c +++ b/arch/arm/plat-mxc/tzic.c @@ -164,8 +164,9 @@ int tzic_enable_wake(int is_idle) return -EAGAIN; for (i = 0; i < 4; i++) { - v = is_idle ? __raw_readl(TZIC_ENSET0(i)) : wakeup_intr[i]; - __raw_writel(v, TZIC_WAKEUP0(i)); + v = is_idle ? __raw_readl(tzic_base + TZIC_ENSET0(i)) : + wakeup_intr[i]; + __raw_writel(v, tzic_base + TZIC_WAKEUP0(i)); } return 0; diff --git a/arch/arm/plat-pxa/pwm.c b/arch/arm/plat-pxa/pwm.c index 0732c6c..ef32686 100644 --- a/arch/arm/plat-pxa/pwm.c +++ b/arch/arm/plat-pxa/pwm.c @@ -176,7 +176,7 @@ static inline void __add_pwm(struct pwm_device *pwm) static int __devinit pwm_probe(struct platform_device *pdev) { - struct platform_device_id *id = platform_get_device_id(pdev); + const struct platform_device_id *id = platform_get_device_id(pdev); struct pwm_device *pwm, *secondary = NULL; struct resource *r; int ret = 0; diff --git a/arch/arm/plat-s5p/dev-fimc0.c b/arch/arm/plat-s5p/dev-fimc0.c index d3f1a9b..608770f 100644 --- a/arch/arm/plat-s5p/dev-fimc0.c +++ b/arch/arm/plat-s5p/dev-fimc0.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -18,7 +19,7 @@ static struct resource s5p_fimc0_resource[] = { [0] = { .start = S5P_PA_FIMC0, - .end = S5P_PA_FIMC0 + SZ_1M - 1, + .end = S5P_PA_FIMC0 + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -28,9 +29,15 @@ static struct resource s5p_fimc0_resource[] = { }, }; +static u64 s5p_fimc0_dma_mask = DMA_BIT_MASK(32); + struct platform_device s5p_device_fimc0 = { .name = "s5p-fimc", .id = 0, .num_resources = ARRAY_SIZE(s5p_fimc0_resource), .resource = s5p_fimc0_resource, + .dev = { + .dma_mask = &s5p_fimc0_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, }; diff --git a/arch/arm/plat-s5p/dev-fimc1.c b/arch/arm/plat-s5p/dev-fimc1.c index 41bd698..76e3a97 100644 --- a/arch/arm/plat-s5p/dev-fimc1.c +++ b/arch/arm/plat-s5p/dev-fimc1.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -18,7 +19,7 @@ static struct resource s5p_fimc1_resource[] = { [0] = { .start = S5P_PA_FIMC1, - .end = S5P_PA_FIMC1 + SZ_1M - 1, + .end = S5P_PA_FIMC1 + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -28,9 +29,15 @@ static struct resource s5p_fimc1_resource[] = { }, }; +static u64 s5p_fimc1_dma_mask = DMA_BIT_MASK(32); + struct platform_device s5p_device_fimc1 = { .name = "s5p-fimc", .id = 1, .num_resources = ARRAY_SIZE(s5p_fimc1_resource), .resource = s5p_fimc1_resource, + .dev = { + .dma_mask = &s5p_fimc1_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, }; diff --git a/arch/arm/plat-s5p/dev-fimc2.c b/arch/arm/plat-s5p/dev-fimc2.c index dfddeda..24d2981 100644 --- a/arch/arm/plat-s5p/dev-fimc2.c +++ b/arch/arm/plat-s5p/dev-fimc2.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -18,7 +19,7 @@ static struct resource s5p_fimc2_resource[] = { [0] = { .start = S5P_PA_FIMC2, - .end = S5P_PA_FIMC2 + SZ_1M - 1, + .end = S5P_PA_FIMC2 + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -28,9 +29,15 @@ static struct resource s5p_fimc2_resource[] = { }, }; +static u64 s5p_fimc2_dma_mask = DMA_BIT_MASK(32); + struct platform_device s5p_device_fimc2 = { .name = "s5p-fimc", .id = 2, .num_resources = ARRAY_SIZE(s5p_fimc2_resource), .resource = s5p_fimc2_resource, + .dev = { + .dma_mask = &s5p_fimc2_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + }, }; diff --git a/arch/arm/plat-samsung/gpio-config.c b/arch/arm/plat-samsung/gpio-config.c index 57b68a5..e3d41ea 100644 --- a/arch/arm/plat-samsung/gpio-config.c +++ b/arch/arm/plat-samsung/gpio-config.c @@ -273,13 +273,13 @@ s5p_gpio_drvstr_t s5p_gpio_get_drvstr(unsigned int pin) if (!chip) return -EINVAL; - off = chip->chip.base - pin; + off = pin - chip->chip.base; shift = off * 2; reg = chip->base + 0x0C; drvstr = __raw_readl(reg); - drvstr = 0xffff & (0x3 << shift); drvstr = drvstr >> shift; + drvstr &= 0x3; return (__force s5p_gpio_drvstr_t)drvstr; } @@ -296,11 +296,12 @@ int s5p_gpio_set_drvstr(unsigned int pin, s5p_gpio_drvstr_t drvstr) if (!chip) return -EINVAL; - off = chip->chip.base - pin; + off = pin - chip->chip.base; shift = off * 2; reg = chip->base + 0x0C; tmp = __raw_readl(reg); + tmp &= ~(0x3 << shift); tmp |= drvstr << shift; __raw_writel(tmp, reg); diff --git a/arch/arm/plat-samsung/include/plat/gpio-cfg.h b/arch/arm/plat-samsung/include/plat/gpio-cfg.h index db4112c..1c6b929 100644 --- a/arch/arm/plat-samsung/include/plat/gpio-cfg.h +++ b/arch/arm/plat-samsung/include/plat/gpio-cfg.h @@ -143,12 +143,12 @@ extern s3c_gpio_pull_t s3c_gpio_getpull(unsigned int pin); /* Define values for the drvstr available for each gpio pin. * * These values control the value of the output signal driver strength, - * configurable on most pins on the S5C series. + * configurable on most pins on the S5P series. */ -#define S5P_GPIO_DRVSTR_LV1 ((__force s5p_gpio_drvstr_t)0x00) -#define S5P_GPIO_DRVSTR_LV2 ((__force s5p_gpio_drvstr_t)0x01) -#define S5P_GPIO_DRVSTR_LV3 ((__force s5p_gpio_drvstr_t)0x10) -#define S5P_GPIO_DRVSTR_LV4 ((__force s5p_gpio_drvstr_t)0x11) +#define S5P_GPIO_DRVSTR_LV1 ((__force s5p_gpio_drvstr_t)0x0) +#define S5P_GPIO_DRVSTR_LV2 ((__force s5p_gpio_drvstr_t)0x2) +#define S5P_GPIO_DRVSTR_LV3 ((__force s5p_gpio_drvstr_t)0x1) +#define S5P_GPIO_DRVSTR_LV4 ((__force s5p_gpio_drvstr_t)0x3) /** * s5c_gpio_get_drvstr() - get the driver streght value of a gpio pin diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 48cbdcb..55590a4 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Mon Jul 12 21:10:14 2010 +# Last update: Thu Sep 9 22:43:01 2010 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -2622,7 +2622,7 @@ kraken MACH_KRAKEN KRAKEN 2634 gw2388 MACH_GW2388 GW2388 2635 jadecpu MACH_JADECPU JADECPU 2636 carlisle MACH_CARLISLE CARLISLE 2637 -lux_sf9 MACH_LUX_SFT9 LUX_SFT9 2638 +lux_sf9 MACH_LUX_SF9 LUX_SF9 2638 nemid_tb MACH_NEMID_TB NEMID_TB 2639 terrier MACH_TERRIER TERRIER 2640 turbot MACH_TURBOT TURBOT 2641 @@ -2950,3 +2950,97 @@ davinci_dm365_dvr MACH_DAVINCI_DM365_DVR DAVINCI_DM365_DVR 2963 netviz MACH_NETVIZ NETVIZ 2964 flexibity MACH_FLEXIBITY FLEXIBITY 2965 wlan_computer MACH_WLAN_COMPUTER WLAN_COMPUTER 2966 +lpc24xx MACH_LPC24XX LPC24XX 2967 +spica MACH_SPICA SPICA 2968 +gpsdisplay MACH_GPSDISPLAY GPSDISPLAY 2969 +bipnet MACH_BIPNET BIPNET 2970 +overo_ctu_inertial MACH_OVERO_CTU_INERTIAL OVERO_CTU_INERTIAL 2971 +davinci_dm355_mmm MACH_DAVINCI_DM355_MMM DAVINCI_DM355_MMM 2972 +pc9260_v2 MACH_PC9260_V2 PC9260_V2 2973 +ptx7545 MACH_PTX7545 PTX7545 2974 +tm_efdc MACH_TM_EFDC TM_EFDC 2975 +omap3_waldo1 MACH_OMAP3_WALDO1 OMAP3_WALDO1 2977 +flyer MACH_FLYER FLYER 2978 +tornado3240 MACH_TORNADO3240 TORNADO3240 2979 +soli_01 MACH_SOLI_01 SOLI_01 2980 +omapl138_europalc MACH_OMAPL138_EUROPALC OMAPL138_EUROPALC 2981 +helios_v1 MACH_HELIOS_V1 HELIOS_V1 2982 +netspace_lite_v2 MACH_NETSPACE_LITE_V2 NETSPACE_LITE_V2 2983 +ssc MACH_SSC SSC 2984 +premierwave_en MACH_PREMIERWAVE_EN PREMIERWAVE_EN 2985 +wasabi MACH_WASABI WASABI 2986 +vivow MACH_VIVOW VIVOW 2987 +mx50_rdp MACH_MX50_RDP MX50_RDP 2988 +universal MACH_UNIVERSAL UNIVERSAL 2989 +real6410 MACH_REAL6410 REAL6410 2990 +spx_sakura MACH_SPX_SAKURA SPX_SAKURA 2991 +ij3k_2440 MACH_IJ3K_2440 IJ3K_2440 2992 +omap3_bc10 MACH_OMAP3_BC10 OMAP3_BC10 2993 +thebe MACH_THEBE THEBE 2994 +rv082 MACH_RV082 RV082 2995 +armlguest MACH_ARMLGUEST ARMLGUEST 2996 +tjinc1000 MACH_TJINC1000 TJINC1000 2997 +dockstar MACH_DOCKSTAR DOCKSTAR 2998 +ax8008 MACH_AX8008 AX8008 2999 +gnet_sgce MACH_GNET_SGCE GNET_SGCE 3000 +pxwnas_500_1000 MACH_PXWNAS_500_1000 PXWNAS_500_1000 3001 +ea20 MACH_EA20 EA20 3002 +awm2 MACH_AWM2 AWM2 3003 +ti8148evm MACH_TI8148EVM TI8148EVM 3004 +tegra_seaboard MACH_TEGRA_SEABOARD TEGRA_SEABOARD 3005 +linkstation_chlv2 MACH_LINKSTATION_CHLV2 LINKSTATION_CHLV2 3006 +tera_pro2_rack MACH_TERA_PRO2_RACK TERA_PRO2_RACK 3007 +rubys MACH_RUBYS RUBYS 3008 +aquarius MACH_AQUARIUS AQUARIUS 3009 +mx53_ard MACH_MX53_ARD MX53_ARD 3010 +mx53_smd MACH_MX53_SMD MX53_SMD 3011 +lswxl MACH_LSWXL LSWXL 3012 +dove_avng_v3 MACH_DOVE_AVNG_V3 DOVE_AVNG_V3 3013 +sdi_ess_9263 MACH_SDI_ESS_9263 SDI_ESS_9263 3014 +jocpu550 MACH_JOCPU550 JOCPU550 3015 +msm8x60_rumi3 MACH_MSM8X60_RUMI3 MSM8X60_RUMI3 3016 +msm8x60_ffa MACH_MSM8X60_FFA MSM8X60_FFA 3017 +yanomami MACH_YANOMAMI YANOMAMI 3018 +gta04 MACH_GTA04 GTA04 3019 +cm_a510 MACH_CM_A510 CM_A510 3020 +omap3_rfs200 MACH_OMAP3_RFS200 OMAP3_RFS200 3021 +kx33xx MACH_KX33XX KX33XX 3022 +ptx7510 MACH_PTX7510 PTX7510 3023 +top9000 MACH_TOP9000 TOP9000 3024 +teenote MACH_TEENOTE TEENOTE 3025 +ts3 MACH_TS3 TS3 3026 +a0 MACH_A0 A0 3027 +fsm9xxx_surf MACH_FSM9XXX_SURF FSM9XXX_SURF 3028 +fsm9xxx_ffa MACH_FSM9XXX_FFA FSM9XXX_FFA 3029 +frrhwcdma60w MACH_FRRHWCDMA60W FRRHWCDMA60W 3030 +remus MACH_REMUS REMUS 3031 +at91cap7xdk MACH_AT91CAP7XDK AT91CAP7XDK 3032 +at91cap7stk MACH_AT91CAP7STK AT91CAP7STK 3033 +kt_sbc_sam9_1 MACH_KT_SBC_SAM9_1 KT_SBC_SAM9_1 3034 +oratisrouter MACH_ORATISROUTER ORATISROUTER 3035 +armada_xp_db MACH_ARMADA_XP_DB ARMADA_XP_DB 3036 +spdm MACH_SPDM SPDM 3037 +gtib MACH_GTIB GTIB 3038 +dgm3240 MACH_DGM3240 DGM3240 3039 +atlas_i_lpe MACH_ATLAS_I_LPE ATLAS_I_LPE 3040 +htcmega MACH_HTCMEGA HTCMEGA 3041 +tricorder MACH_TRICORDER TRICORDER 3042 +tx28 MACH_TX28 TX28 3043 +bstbrd MACH_BSTBRD BSTBRD 3044 +pwb3090 MACH_PWB3090 PWB3090 3045 +idea6410 MACH_IDEA6410 IDEA6410 3046 +qbc9263 MACH_QBC9263 QBC9263 3047 +borabora MACH_BORABORA BORABORA 3048 +valdez MACH_VALDEZ VALDEZ 3049 +ls9g20 MACH_LS9G20 LS9G20 3050 +mios_v1 MACH_MIOS_V1 MIOS_V1 3051 +s5pc110_crespo MACH_S5PC110_CRESPO S5PC110_CRESPO 3052 +controltek9g20 MACH_CONTROLTEK9G20 CONTROLTEK9G20 3053 +tin307 MACH_TIN307 TIN307 3054 +tin510 MACH_TIN510 TIN510 3055 +bluecheese MACH_BLUECHEESE BLUECHEESE 3057 +tem3x30 MACH_TEM3X30 TEM3X30 3058 +harvest_desoto MACH_HARVEST_DESOTO HARVEST_DESOTO 3059 +msm8x60_qrdc MACH_MSM8X60_QRDC MSM8X60_QRDC 3060 +spear900 MACH_SPEAR900 SPEAR900 3061 +pcontrol_g20 MACH_PCONTROL_G20 PCONTROL_G20 3062 diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index e936804..984221a 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -18,7 +18,8 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) { - int ret,flags; + unsigned long flags; + int ret; local_irq_save(flags); ret = v->counter += i; local_irq_restore(flags); @@ -30,7 +31,8 @@ static __inline__ int atomic_add_return(int i, atomic_t *v) static __inline__ int atomic_sub_return(int i, atomic_t *v) { - int ret,flags; + unsigned long flags; + int ret; local_irq_save(flags); ret = v->counter -= i; local_irq_restore(flags); @@ -42,7 +44,8 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v) static __inline__ int atomic_inc_return(atomic_t *v) { - int ret,flags; + unsigned long flags; + int ret; local_irq_save(flags); v->counter++; ret = v->counter; @@ -64,7 +67,8 @@ static __inline__ int atomic_inc_return(atomic_t *v) static __inline__ int atomic_dec_return(atomic_t *v) { - int ret,flags; + unsigned long flags; + int ret; local_irq_save(flags); --v->counter; ret = v->counter; @@ -76,7 +80,8 @@ static __inline__ int atomic_dec_return(atomic_t *v) static __inline__ int atomic_dec_and_test(atomic_t *v) { - int ret,flags; + unsigned long flags; + int ret; local_irq_save(flags); --v->counter; ret = v->counter; diff --git a/arch/h8300/include/asm/system.h b/arch/h8300/include/asm/system.h index d98d976..16bf156 100644 --- a/arch/h8300/include/asm/system.h +++ b/arch/h8300/include/asm/system.h @@ -3,6 +3,8 @@ #include +struct pt_regs; + /* * switch_to(n) should switch tasks to task ptr, first checking that * ptr isn't the current task, in which case it does nothing. This @@ -155,6 +157,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz #define arch_align_stack(x) (x) -void die(char *str, struct pt_regs *fp, unsigned long err); +extern void die(const char *str, struct pt_regs *fp, unsigned long err); #endif /* _H8300_SYSTEM_H */ diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index dc1ac02..aaf5e5a 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -56,8 +56,8 @@ int kernel_execve(const char *filename, const char *const envp[]) { register long res __asm__("er0"); - register char *const *_c __asm__("er3") = envp; - register char *const *_b __asm__("er2") = argv; + register const char *const *_c __asm__("er3") = envp; + register const char *const *_b __asm__("er2") = argv; register const char * _a __asm__("er1") = filename; __asm__ __volatile__ ("mov.l %1,er0\n\t" "trapa #0\n\t" diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 3c0b66b..dfa05bd 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -96,7 +96,7 @@ static void dump(struct pt_regs *fp) printk("\n\n"); } -void die(char *str, struct pt_regs *fp, unsigned long err) +void die(const char *str, struct pt_regs *fp, unsigned long err) { static int diecount; diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h index f90edc8..9301a28 100644 --- a/arch/ia64/include/asm/compat.h +++ b/arch/ia64/include/asm/compat.h @@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr) } static __inline__ void __user * -compat_alloc_user_space (long len) +arch_compat_alloc_user_space (long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len); diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 3567d54..331d42b 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -420,22 +420,31 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set ;; RSM_PSR_I(p0, r18, r19) // mask interrupt delivery - mov ar.ccv=0 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP + mov r8=EINVAL // default to EINVAL #ifdef CONFIG_SMP - mov r17=1 + // __ticket_spin_trylock(r31) + ld4 r17=[r31] ;; - cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock - mov r8=EINVAL // default to EINVAL + mov.m ar.ccv=r17 + extr.u r9=r17,17,15 + adds r19=1,r17 + extr.u r18=r17,0,15 + ;; + cmp.eq p6,p7=r9,r18 ;; +(p6) cmpxchg4.acq r9=[r31],r19,ar.ccv +(p6) dep.z r20=r19,1,15 // next serving ticket for unlock +(p7) br.cond.spnt.many .lock_contention + ;; + cmp4.eq p0,p7=r9,r17 + adds r31=2,r31 +(p7) br.cond.spnt.many .lock_contention ld8 r3=[r2] // re-read current->blocked now that we hold the lock - cmp4.ne p6,p0=r18,r0 -(p6) br.cond.spnt.many .lock_contention ;; #else ld8 r3=[r2] // re-read current->blocked now that we hold the lock - mov r8=EINVAL // default to EINVAL #endif add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 add r19=IA64_TASK_SIGNAL_OFFSET,r16 @@ -490,7 +499,9 @@ EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set (p6) br.cond.spnt.few 1b // yes -> retry #ifdef CONFIG_SMP - st4.rel [r31]=r0 // release the lock + // __ticket_spin_unlock(r31) + st2.rel [r31]=r20 + mov r20=0 // i must not leak kernel bits... #endif SSM_PSR_I(p0, p9, r31) ;; @@ -512,7 +523,8 @@ EX(.fail_efault, (p15) st8 [r34]=r3) .sig_pending: #ifdef CONFIG_SMP - st4.rel [r31]=r0 // release the lock + // __ticket_spin_unlock(r31) + st2.rel [r31]=r20 // release the lock #endif SSM_PSR_I(p0, p9, r17) ;; diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 60b15d0..b43b36b 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -340,10 +340,13 @@ #define __NR_set_thread_area 334 #define __NR_atomic_cmpxchg_32 335 #define __NR_atomic_barrier 336 +#define __NR_fanotify_init 337 +#define __NR_fanotify_mark 338 +#define __NR_prlimit64 339 #ifdef __KERNEL__ -#define NR_syscalls 337 +#define NR_syscalls 340 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 2391bdf..6360c43 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -765,4 +765,7 @@ sys_call_table: .long sys_set_thread_area .long sys_atomic_cmpxchg_32 /* 335 */ .long sys_atomic_barrier + .long sys_fanotify_init + .long sys_fanotify_mark + .long sys_prlimit64 diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index b30b3eb..79b1ed1 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -355,6 +355,9 @@ ENTRY(sys_call_table) .long sys_set_thread_area .long sys_atomic_cmpxchg_32 /* 335 */ .long sys_atomic_barrier + .long sys_fanotify_init + .long sys_fanotify_mark + .long sys_prlimit64 .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S index a91b271..ef33213 100644 --- a/arch/m68knommu/kernel/vmlinux.lds.S +++ b/arch/m68knommu/kernel/vmlinux.lds.S @@ -150,6 +150,8 @@ SECTIONS { _sdata = . ; DATA_DATA CACHELINE_ALIGNED_DATA(32) + PAGE_ALIGNED_DATA(PAGE_SIZE) + *(.data..shared_aligned) INIT_TASK_DATA(THREAD_SIZE) _edata = . ; } > DATA diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 613f691..dbc5106 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = (struct pt_regs *) ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1; diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index 9d49073..db509dd 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -156,17 +156,17 @@ struct mn10300_serial_port mn10300_serial_port_sif0 = { ._intr = &SC0ICR, ._rxb = &SC0RXB, ._txb = &SC0TXB, - .rx_name = "ttySM0/Rx", - .tx_name = "ttySM0/Tx", + .rx_name = "ttySM0:Rx", + .tx_name = "ttySM0:Tx", #ifdef CONFIG_MN10300_TTYSM0_TIMER8 - .tm_name = "ttySM0/Timer8", + .tm_name = "ttySM0:Timer8", ._tmxmd = &TM8MD, ._tmxbr = &TM8BR, ._tmicr = &TM8ICR, .tm_irq = TM8IRQ, .div_timer = MNSCx_DIV_TIMER_16BIT, #else /* CONFIG_MN10300_TTYSM0_TIMER2 */ - .tm_name = "ttySM0/Timer2", + .tm_name = "ttySM0:Timer2", ._tmxmd = &TM2MD, ._tmxbr = (volatile u16 *) &TM2BR, ._tmicr = &TM2ICR, @@ -209,17 +209,17 @@ struct mn10300_serial_port mn10300_serial_port_sif1 = { ._intr = &SC1ICR, ._rxb = &SC1RXB, ._txb = &SC1TXB, - .rx_name = "ttySM1/Rx", - .tx_name = "ttySM1/Tx", + .rx_name = "ttySM1:Rx", + .tx_name = "ttySM1:Tx", #ifdef CONFIG_MN10300_TTYSM1_TIMER9 - .tm_name = "ttySM1/Timer9", + .tm_name = "ttySM1:Timer9", ._tmxmd = &TM9MD, ._tmxbr = &TM9BR, ._tmicr = &TM9ICR, .tm_irq = TM9IRQ, .div_timer = MNSCx_DIV_TIMER_16BIT, #else /* CONFIG_MN10300_TTYSM1_TIMER3 */ - .tm_name = "ttySM1/Timer3", + .tm_name = "ttySM1:Timer3", ._tmxmd = &TM3MD, ._tmxbr = (volatile u16 *) &TM3BR, ._tmicr = &TM3ICR, @@ -260,9 +260,9 @@ struct mn10300_serial_port mn10300_serial_port_sif2 = { .uart.lock = __SPIN_LOCK_UNLOCKED(mn10300_serial_port_sif2.uart.lock), .name = "ttySM2", - .rx_name = "ttySM2/Rx", - .tx_name = "ttySM2/Tx", - .tm_name = "ttySM2/Timer10", + .rx_name = "ttySM2:Rx", + .tx_name = "ttySM2:Tx", + .tm_name = "ttySM2:Timer10", ._iobase = &SC2CTR, ._control = &SC2CTR, ._status = &SC2STR, diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 02b77ba..efa0b60 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static __inline__ void __user *compat_alloc_user_space(long len) +static __inline__ void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = ¤t->thread.regs; return (void __user *)regs->gr[30]; diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 396d21a..a11d4ea 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current->thread.regs; unsigned long usp = regs->gpr[1]; diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h index a67aeed..debc5ed 100644 --- a/arch/powerpc/include/asm/fsldma.h +++ b/arch/powerpc/include/asm/fsldma.h @@ -11,6 +11,7 @@ #ifndef __ARCH_POWERPC_ASM_FSLDMA_H__ #define __ARCH_POWERPC_ASM_FSLDMA_H__ +#include #include /* diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4d6681d..c571cd3 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -575,13 +575,19 @@ __secondary_start: /* Initialize the kernel stack. Just a repeat for iSeries. */ LOAD_REG_ADDR(r3, current_set) sldi r28,r24,3 /* get current_set[cpu#] */ - ldx r1,r3,r28 - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD - std r1,PACAKSAVE(r13) + ldx r14,r3,r28 + addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD + std r14,PACAKSAVE(r13) /* Do early setup for that CPU (stab, slb, hash table pointer) */ bl .early_setup_secondary + /* + * setup the new stack pointer, but *don't* use this until + * translation is on. + */ + mr r1, r14 + /* Clear backchain so we get nice backtraces */ li r7,0 mtlr r7 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 6bbd7a6..a7a570d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -810,6 +810,9 @@ relocate_new_kernel: isync sync + mfspr r3, SPRN_PIR /* current core we are running on */ + mr r4, r5 /* load physical address of chunk called */ + /* jump to the entry point, usually the setup routine */ mtlr r5 blrl diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ce53dfa..8533b3b 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -577,20 +577,11 @@ void timer_interrupt(struct pt_regs * regs) * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); -#ifdef CONFIG_PPC32 +#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif - now = get_tb_or_rtc(); - if (now < decrementer->next_tb) { - /* not time for this event yet */ - now = decrementer->next_tb - now; - if (now <= DECREMENTER_MAX) - set_dec((int)now); - trace_timer_interrupt_exit(regs); - return; - } old_regs = set_irq_regs(regs); irq_enter(); @@ -606,8 +597,16 @@ void timer_interrupt(struct pt_regs * regs) get_lppaca()->int_dword.fields.decr_int = 0; #endif - if (evt->event_handler) - evt->event_handler(evt); + now = get_tb_or_rtc(); + if (now >= decrementer->next_tb) { + decrementer->next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + } else { + now = decrementer->next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + } #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c index f9751c8..8306832 100644 --- a/arch/powerpc/platforms/83xx/mpc837x_mds.c +++ b/arch/powerpc/platforms/83xx/mpc837x_mds.c @@ -48,8 +48,10 @@ static int mpc837xmds_usb_cfg(void) return -1; np = of_find_node_by_name(NULL, "usb"); - if (!np) - return -ENODEV; + if (!np) { + ret = -ENODEV; + goto out; + } phy_type = of_get_property(np, "phy_type", NULL); if (phy_type && !strcmp(phy_type, "ulpi")) { clrbits8(bcsr_regs + 12, BCSR12_USB_SER_PIN); @@ -65,8 +67,9 @@ static int mpc837xmds_usb_cfg(void) } of_node_put(np); +out: iounmap(bcsr_regs); - return 0; + return ret; } /* ************************************************************************ diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index da64be1..aa34cac 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -357,6 +357,7 @@ static void __init mpc85xx_mds_setup_arch(void) { #ifdef CONFIG_PCI struct pci_controller *hose; + struct device_node *np; #endif dma_addr_t max = 0xffffffff; diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index e1467c9..34e0090 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include @@ -97,7 +97,7 @@ static void __init p1022_ds_setup_arch(void) #endif #ifdef CONFIG_SWIOTLB - if (lmb_end_of_DRAM() > max) { + if (memblock_end_of_DRAM() > max) { ppc_swiotlb_enable = 1; set_pci_dma_ops(&swiotlb_dma_ops); ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 227c1c3..72d8054 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -129,20 +129,35 @@ struct device_node *dlpar_configure_connector(u32 drc_index) struct property *property; struct property *last_property = NULL; struct cc_workarea *ccwa; + char *data_buf; int cc_token; - int rc; + int rc = -1; cc_token = rtas_token("ibm,configure-connector"); if (cc_token == RTAS_UNKNOWN_SERVICE) return NULL; - spin_lock(&rtas_data_buf_lock); - ccwa = (struct cc_workarea *)&rtas_data_buf[0]; + data_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL); + if (!data_buf) + return NULL; + + ccwa = (struct cc_workarea *)&data_buf[0]; ccwa->drc_index = drc_index; ccwa->zero = 0; - rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL); - while (rc) { + do { + /* Since we release the rtas_data_buf lock between configure + * connector calls we want to re-populate the rtas_data_buffer + * with the contents of the previous call. + */ + spin_lock(&rtas_data_buf_lock); + + memcpy(rtas_data_buf, data_buf, RTAS_DATA_BUF_SIZE); + rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL); + memcpy(data_buf, rtas_data_buf, RTAS_DATA_BUF_SIZE); + + spin_unlock(&rtas_data_buf_lock); + switch (rc) { case NEXT_SIBLING: dn = dlpar_parse_cc_node(ccwa); @@ -197,18 +212,19 @@ struct device_node *dlpar_configure_connector(u32 drc_index) "returned from configure-connector\n", rc); goto cc_error; } + } while (rc); - rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL); +cc_error: + kfree(data_buf); + + if (rc) { + if (first_dn) + dlpar_free_cc_nodes(first_dn); + + return NULL; } - spin_unlock(&rtas_data_buf_lock); return first_dn; - -cc_error: - if (first_dn) - dlpar_free_cc_nodes(first_dn); - spin_unlock(&rtas_data_buf_lock); - return NULL; } static struct device_node *derive_parent(const char *path) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 209384b..4ae9332 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -399,6 +399,8 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1013, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1020, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021E, quirk_fsl_pcie_header); +DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1021, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022E, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P1022, quirk_fsl_pcie_header); DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header); diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 6425abe..3017532 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -240,12 +240,13 @@ struct rio_priv { static void __iomem *rio_regs_win; +#ifdef CONFIG_E500 static int (*saved_mcheck_exception)(struct pt_regs *regs); static int fsl_rio_mcheck_exception(struct pt_regs *regs) { const struct exception_table_entry *entry = NULL; - unsigned long reason = (mfspr(SPRN_MCSR) & MCSR_MASK); + unsigned long reason = mfspr(SPRN_MCSR); if (reason & MCSR_BUS_RBERR) { reason = in_be32((u32 *)(rio_regs_win + RIO_LTLEDCSR)); @@ -269,6 +270,7 @@ static int fsl_rio_mcheck_exception(struct pt_regs *regs) else return cur_cpu_spec->machine_check(regs); } +#endif /** * fsl_rio_doorbell_send - Send a MPC85xx doorbell message @@ -1517,8 +1519,10 @@ int fsl_rio_setup(struct platform_device *dev) fsl_rio_doorbell_init(port); fsl_rio_port_write_init(port); +#ifdef CONFIG_E500 saved_mcheck_exception = ppc_md.machine_check_exception; ppc_md.machine_check_exception = fsl_rio_mcheck_exception; +#endif /* Ensure that RFXE is set */ mtspr(SPRN_HID1, (mfspr(SPRN_HID1) | 0x20000)); diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 3da8014..90020de 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -640,6 +640,7 @@ unsigned int qe_get_num_of_snums(void) if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) { /* No QE ever has fewer than 28 SNUMs */ pr_err("QE: number of snum is invalid\n"); + of_node_put(qe); return -EINVAL; } } diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 104f200..a875c2f 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -181,7 +181,7 @@ static inline int is_compat_task(void) #endif -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { unsigned long stack; diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 5016f76..6f57325 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current_thread_info()->kregs; unsigned long usp = regs->u_regs[UREG_I6]; diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 5079413..675c9e1 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -166,7 +166,6 @@ sparc_breakpoint (struct pt_regs *regs) { siginfo_t info; - lock_kernel(); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc); #endif @@ -180,7 +179,6 @@ sparc_breakpoint (struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc); #endif - unlock_kernel(); } asmlinkage int diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index f8514e2..12b9f35 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -323,7 +323,6 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) { enum direction dir; - lock_kernel(); if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) || (((insn >> 30) & 3) != 3)) goto kill_user; @@ -377,5 +376,5 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) kill_user: user_mna_trap_fault(regs, insn); out: - unlock_kernel(); + ; } diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c index f24d298..b351770 100644 --- a/arch/sparc/kernel/windows.c +++ b/arch/sparc/kernel/windows.c @@ -112,7 +112,6 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) struct thread_info *tp = current_thread_info(); int window; - lock_kernel(); flush_user_windows(); for(window = 0; window < tp->w_saved; window++) { unsigned long sp = tp->rwbuf_stkptrs[window]; @@ -123,5 +122,4 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who) do_exit(SIGILL); } tp->w_saved = 0; - unlock_kernel(); } diff --git a/arch/tile/include/arch/chip_tile64.h b/arch/tile/include/arch/chip_tile64.h index 1246573..261aaba 100644 --- a/arch/tile/include/arch/chip_tile64.h +++ b/arch/tile/include/arch/chip_tile64.h @@ -150,6 +150,9 @@ /** Is the PROC_STATUS SPR supported? */ #define CHIP_HAS_PROC_STATUS_SPR() 0 +/** Is the DSTREAM_PF SPR supported? */ +#define CHIP_HAS_DSTREAM_PF() 0 + /** Log of the number of mshims we have. */ #define CHIP_LOG_NUM_MSHIMS() 2 diff --git a/arch/tile/include/arch/chip_tilepro.h b/arch/tile/include/arch/chip_tilepro.h index e864c47..7001769 100644 --- a/arch/tile/include/arch/chip_tilepro.h +++ b/arch/tile/include/arch/chip_tilepro.h @@ -150,6 +150,9 @@ /** Is the PROC_STATUS SPR supported? */ #define CHIP_HAS_PROC_STATUS_SPR() 1 +/** Is the DSTREAM_PF SPR supported? */ +#define CHIP_HAS_DSTREAM_PF() 0 + /** Log of the number of mshims we have. */ #define CHIP_LOG_NUM_MSHIMS() 2 diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 5a34da6..8b60ec8 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -195,7 +195,7 @@ static inline unsigned long ptr_to_compat_reg(void __user *uptr) return (long)(int)(long __force)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; @@ -214,8 +214,9 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka, struct compat_sigaction; struct compat_siginfo; struct compat_sigaltstack; -long compat_sys_execve(char __user *path, compat_uptr_t __user *argv, - compat_uptr_t __user *envp); +long compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp); long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, struct compat_sigaction __user *oact, size_t sigsetsize); diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index 8c95bef..ee43328 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -164,22 +164,22 @@ static inline void _tile_writeq(u64 val, unsigned long addr) #define iowrite32 writel #define iowrite64 writeq -static inline void *memcpy_fromio(void *dst, void *src, int len) +static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, + size_t len) { int x; BUG_ON((unsigned long)src & 0x3); for (x = 0; x < len; x += 4) *(u32 *)(dst + x) = readl(src + x); - return dst; } -static inline void *memcpy_toio(void *dst, void *src, int len) +static inline void memcpy_toio(volatile void __iomem *dst, const void *src, + size_t len) { int x; BUG_ON((unsigned long)dst & 0x3); for (x = 0; x < len; x += 4) writel(*(u32 *)(src + x), dst + x); - return dst; } /* diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index d942d09..ccd5f84 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -103,6 +103,18 @@ struct thread_struct { /* Any other miscellaneous processor state bits */ unsigned long proc_status; #endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + /* Interrupt base for PL0 interrupts */ + unsigned long interrupt_vector_base; +#endif +#if CHIP_HAS_TILE_RTF_HWM() + /* Tile cache retry fifo high-water mark */ + unsigned long tile_rtf_hwm; +#endif +#if CHIP_HAS_DSTREAM_PF() + /* Data stream prefetch control */ + unsigned long dstream_pf; +#endif #ifdef CONFIG_HARDWALL /* Is this task tied to an activated hardwall? */ struct hardwall_info *hardwall; diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index acdae81..4a02bb0 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -51,10 +51,7 @@ typedef uint_reg_t pt_reg_t; /* * This struct defines the way the registers are stored on the stack during a - * system call/exception. It should be a multiple of 8 bytes to preserve - * normal stack alignment rules. - * - * Must track and + * system call or exception. "struct sigcontext" has the same shape. */ struct pt_regs { /* Saved main processor registers; 56..63 are special. */ @@ -80,11 +77,6 @@ struct pt_regs { #endif /* __ASSEMBLY__ */ -/* Flag bits in pt_regs.flags */ -#define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */ -#define PT_FLAGS_CALLER_SAVES 2 /* caller-save registers are valid */ -#define PT_FLAGS_RESTORE_REGS 4 /* restore callee-save regs on return */ - #define PTRACE_GETREGS 12 #define PTRACE_SETREGS 13 #define PTRACE_GETFPREGS 14 @@ -101,6 +93,11 @@ struct pt_regs { #ifdef __KERNEL__ +/* Flag bits in pt_regs.flags */ +#define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */ +#define PT_FLAGS_CALLER_SAVES 2 /* caller-save registers are valid */ +#define PT_FLAGS_RESTORE_REGS 4 /* restore callee-save regs on return */ + #ifndef __ASSEMBLY__ #define instruction_pointer(regs) ((regs)->pc) diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h index 7cd7672..5e2d033 100644 --- a/arch/tile/include/asm/sigcontext.h +++ b/arch/tile/include/asm/sigcontext.h @@ -15,13 +15,21 @@ #ifndef _ASM_TILE_SIGCONTEXT_H #define _ASM_TILE_SIGCONTEXT_H -/* NOTE: we can't include due to #include dependencies. */ -#include - -/* Must track */ +#include +/* + * struct sigcontext has the same shape as struct pt_regs, + * but is simplified since we know the fault is from userspace. + */ struct sigcontext { - struct pt_regs regs; + uint_reg_t gregs[53]; /* General-purpose registers. */ + uint_reg_t tp; /* Aliases gregs[TREG_TP]. */ + uint_reg_t sp; /* Aliases gregs[TREG_SP]. */ + uint_reg_t lr; /* Aliases gregs[TREG_LR]. */ + uint_reg_t pc; /* Program counter. */ + uint_reg_t ics; /* In Interrupt Critical Section? */ + uint_reg_t faultnum; /* Fault number. */ + uint_reg_t pad[5]; }; #endif /* _ASM_TILE_SIGCONTEXT_H */ diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h index eb0253f..c1ee1d6 100644 --- a/arch/tile/include/asm/signal.h +++ b/arch/tile/include/asm/signal.h @@ -24,6 +24,7 @@ #include #if defined(__KERNEL__) && !defined(__ASSEMBLY__) +struct pt_regs; int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *); int setup_sigcontext(struct sigcontext __user *, struct pt_regs *); void do_signal(struct pt_regs *regs); diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h index af165a7..ce99ffe 100644 --- a/arch/tile/include/asm/syscalls.h +++ b/arch/tile/include/asm/syscalls.h @@ -62,10 +62,12 @@ long sys_fork(void); long _sys_fork(struct pt_regs *regs); long sys_vfork(void); long _sys_vfork(struct pt_regs *regs); -long sys_execve(char __user *filename, char __user * __user *argv, - char __user * __user *envp); -long _sys_execve(char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); +long sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp); +long _sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs); /* kernel/signal.c */ long sys_sigaltstack(const stack_t __user *, stack_t __user *); @@ -86,10 +88,13 @@ int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *); #endif #ifdef CONFIG_COMPAT -long compat_sys_execve(char __user *path, compat_uptr_t __user *argv, - compat_uptr_t __user *envp); -long _compat_sys_execve(char __user *path, compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs *regs); +long compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp); +long _compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, + struct pt_regs *regs); long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, struct compat_sigaltstack __user *uoss_ptr); long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 985cc28..84c2911 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -408,6 +408,15 @@ static void save_arch_state(struct thread_struct *t) #if CHIP_HAS_PROC_STATUS_SPR() t->proc_status = __insn_mfspr(SPR_PROC_STATUS); #endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); +#endif +#if CHIP_HAS_TILE_RTF_HWM() + t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); +#endif +#if CHIP_HAS_DSTREAM_PF() + t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); +#endif } static void restore_arch_state(const struct thread_struct *t) @@ -428,14 +437,14 @@ static void restore_arch_state(const struct thread_struct *t) #if CHIP_HAS_PROC_STATUS_SPR() __insn_mtspr(SPR_PROC_STATUS, t->proc_status); #endif +#if !CHIP_HAS_FIXED_INTVEC_BASE() + __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); +#endif #if CHIP_HAS_TILE_RTF_HWM() - /* - * Clear this whenever we switch back to a process in case - * the previous process was monkeying with it. Even if enabled - * in CBOX_MSR1 via TILE_RTF_HWM_MIN, it's still just a - * performance hint, so isn't worth a full save/restore. - */ - __insn_mtspr(SPR_TILE_RTF_HWM, 0); + __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); +#endif +#if CHIP_HAS_DSTREAM_PF() + __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); #endif } @@ -561,8 +570,9 @@ out: } #ifdef CONFIG_COMPAT -long _compat_sys_execve(char __user *path, compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs *regs) +long _compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, struct pt_regs *regs) { long error; char *filename; @@ -657,7 +667,7 @@ void show_regs(struct pt_regs *regs) regs->regs[51], regs->regs[52], regs->tp); pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr); #else - for (i = 0; i < 52; i += 3) + for (i = 0; i < 52; i += 4) pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT " r%-2d: "REGFMT" r%-2d: "REGFMT"\n", i, regs->regs[i], i+1, regs->regs[i+1], diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 45b66a3..ce183aa 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -61,13 +61,19 @@ int restore_sigcontext(struct pt_regs *regs, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; + /* + * Enforce that sigcontext is like pt_regs, and doesn't mess + * up our stack alignment rules. + */ + BUILD_BUG_ON(sizeof(struct sigcontext) != sizeof(struct pt_regs)); + BUILD_BUG_ON(sizeof(struct sigcontext) % 8 != 0); + for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) - err |= __get_user(((long *)regs)[i], - &((long __user *)(&sc->regs))[i]); + err |= __get_user(regs->regs[i], &sc->gregs[i]); regs->faultnum = INT_SWINT_1_SIGRETURN; - err |= __get_user(*pr0, &sc->regs.regs[0]); + err |= __get_user(*pr0, &sc->gregs[0]); return err; } @@ -112,8 +118,7 @@ int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) int i, err = 0; for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) - err |= __put_user(((long *)regs)[i], - &((long __user *)(&sc->regs))[i]); + err |= __put_user(regs->regs[i], &sc->gregs[i]); return err; } @@ -203,19 +208,17 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Set up registers for signal handler. * Registers that we don't modify keep the value they had from * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). */ regs->pc = (unsigned long) ka->sa.sa_handler; regs->ex1 = PL_ICS_EX1(USER_PL, 1); /* set crit sec in handler */ regs->sp = (unsigned long) frame; regs->lr = restorer; regs->regs[0] = (unsigned long) usig; - - if (ka->sa.sa_flags & SA_SIGINFO) { - /* Need extra arguments, so mark to restore caller-saves. */ - regs->regs[1] = (unsigned long) &frame->info; - regs->regs[2] = (unsigned long) &frame->uc; - regs->flags |= PT_FLAGS_CALLER_SAVES; - } + regs->regs[1] = (unsigned long) &frame->info; + regs->regs[2] = (unsigned long) &frame->uc; + regs->flags |= PT_FLAGS_CALLER_SAVES; /* * Notify any tracer that was single-stepping it. diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 38a68b0b4..ea2e0ce 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -175,7 +175,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt) pr_err(" \n", frame->info.si_signo); } - return &frame->uc.uc_mcontext.regs; + return (struct pt_regs *)&frame->uc.uc_mcontext; } return NULL; } diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8aa1b59..e8c8881 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -74,7 +74,7 @@ endif ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh - ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) + ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) stackp-y := -fstack-protector KBUILD_CFLAGS += $(stackp-y) else diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index b86feab..518bb99 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -50,7 +50,12 @@ /* * Reload arg registers from stack in case ptrace changed them. * We don't reload %eax because syscall_trace_enter() returned - * the value it wants us to use in the table lookup. + * the %rax value we should see. Instead, we just truncate that + * value to 32 bits again as we did on entry from user mode. + * If it's a new value set by user_regset during entry tracing, + * this matches the normal truncation of the user-mode value. + * If it's -1 to make us punt the syscall, then (u32)-1 is still + * an appropriately invalid value. */ .macro LOAD_ARGS32 offset, _r9=0 .if \_r9 @@ -60,6 +65,7 @@ movl \offset+48(%rsp),%edx movl \offset+56(%rsp),%esi movl \offset+64(%rsp),%edi + movl %eax,%eax /* zero extension */ .endm .macro CFI_STARTPROC32 simple @@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys sysenter_do_call: IA32_ARG_FIXUP @@ -195,7 +201,7 @@ sysexit_from_sys_call: movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ call audit_syscall_entry movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys movl %ebx,%edi /* reload 1st syscall arg */ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ @@ -248,7 +254,7 @@ sysenter_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC @@ -314,7 +320,7 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpl $IA32_NR_syscalls-1,%eax + cmpq $IA32_NR_syscalls-1,%rax ja ia32_badsys cstar_do_call: IA32_ARG_FIXUP 1 @@ -367,7 +373,7 @@ cstar_tracesys: LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ RESTORE_REST xchgl %ebp,%r9d - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -425,7 +431,7 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys ia32_do_call: IA32_ARG_FIXUP @@ -444,7 +450,7 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call END(ia32_syscall) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 306160e..1d9cd27 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 781a50b..c6fbb7b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -296,6 +296,7 @@ extern const char * const x86_power_flags[32]; #endif /* CONFIG_X86_64 */ +#if __GNUC__ >= 4 /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These are only valid after alternatives have run, but will statically @@ -304,7 +305,7 @@ extern const char * const x86_power_flags[32]; */ static __always_inline __pure bool __static_cpu_has(u16 bit) { -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 asm goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" @@ -345,7 +346,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) #endif } -#if __GNUC__ >= 4 #define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 004e6e2..1d5c08a 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -68,7 +68,6 @@ extern unsigned long force_hpet_address; extern u8 hpet_blockid; extern int hpet_force_user; extern u8 hpet_msi_disable; -extern u8 hpet_readback_cmp; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index f35eb45..c4191b3 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -26,11 +26,11 @@ #include #include -void * +void __iomem * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void -iounmap_atomic(void *kvaddr, enum km_type type); +iounmap_atomic(void __iomem *kvaddr, enum km_type type); int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 51cfd73..1f99ecf 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -152,9 +152,14 @@ struct x86_emulate_ops { struct operand { enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; unsigned int bytes; - unsigned long orig_val, *ptr; + union { + unsigned long orig_val; + u64 orig_val64; + }; + unsigned long *ptr; union { unsigned long val; + u64 val64; char valptr[sizeof(unsigned long) + 2]; }; }; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 404a880..d395540 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -27,6 +27,9 @@ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node); extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); +#ifdef CONFIG_PCI + +#ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { struct pci_sysdata *sd = bus->sysdata; @@ -37,13 +40,12 @@ static inline int pci_proc_domain(struct pci_bus *bus) { return pci_domain_nr(bus); } - +#endif /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ -#ifdef CONFIG_PCI extern unsigned int pcibios_assign_all_busses(void); extern int pci_legacy_init(void); # ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7b598b8..f744f54 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -698,9 +698,11 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - uv_blade_info[blade].pnode = (i * 64 + j); + pnode = (i * 64 + j); + uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + max_pnode = max(pnode, max_pnode); blade++; } } @@ -738,7 +740,6 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; - max_pnode = max(pnode, max_pnode); } /* Add blade/pnode info for nodes without cpus */ @@ -750,7 +751,6 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; - max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 224392d..5e97529 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -530,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } - if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { kfree(b); err = -ENOMEM; goto out; @@ -543,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP cpumask_setall(b->cpus); #else - cpumask_copy(b->cpus, c->llc_shared_map); + cpumask_set_cpu(cpu, b->cpus); #endif per_cpu(threshold_banks, cpu)[bank] = b; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index c2a8b26..d9368ee 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level) #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, + unsigned int cpu) { int err; - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + struct cpuinfo_x86 *c = &cpu_data(cpu); err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); if (err) @@ -251,7 +252,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(sys_dev); + err = thermal_throttle_add_dev(sys_dev, cpu); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; @@ -287,7 +288,7 @@ static __init int thermal_throttle_init_device(void) #endif /* connect live CPUs to sysfs */ for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); + err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); WARN_ON(err); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f2da20f..3efdf28 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) /* * event overflow */ - handled = 1; + handled++; data.period = event->hw.last_period; if (!x86_perf_event_set_period(event)) @@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } +struct pmu_nmi_state { + unsigned int marked; + int handled; +}; + +static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); + static int __kprobes perf_event_nmi_handler(struct notifier_block *self, unsigned long cmd, void *__args) { struct die_args *args = __args; - struct pt_regs *regs; + unsigned int this_nmi; + int handled; if (!atomic_read(&active_events)) return NOTIFY_DONE; @@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self, case DIE_NMI: case DIE_NMI_IPI: break; - + case DIE_NMIUNKNOWN: + this_nmi = percpu_read(irq_stat.__nmi_count); + if (this_nmi != __get_cpu_var(pmu_nmi).marked) + /* let the kernel handle the unknown nmi */ + return NOTIFY_DONE; + /* + * This one is a PMU back-to-back nmi. Two events + * trigger 'simultaneously' raising two back-to-back + * NMIs. If the first NMI handles both, the latter + * will be empty and daze the CPU. So, we drop it to + * avoid false-positive 'unknown nmi' messages. + */ + return NOTIFY_STOP; default: return NOTIFY_DONE; } - regs = args->regs; - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* - * Can't rely on the handled return value to say it was our NMI, two - * events could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); + + handled = x86_pmu.handle_irq(args->regs); + if (!handled) + return NOTIFY_DONE; + + this_nmi = percpu_read(irq_stat.__nmi_count); + if ((handled > 1) || + /* the next nmi could be a back-to-back nmi */ + ((__get_cpu_var(pmu_nmi).marked == this_nmi) && + (__get_cpu_var(pmu_nmi).handled > 1))) { + /* + * We could have two subsequent back-to-back nmis: The + * first handles more than one counter, the 2nd + * handles only one counter and the 3rd handles no + * counter. + * + * This is the 2nd nmi because the previous was + * handling more than one counter. We will mark the + * next (3rd) and then drop it if unhandled. + */ + __get_cpu_var(pmu_nmi).marked = this_nmi + 1; + __get_cpu_var(pmu_nmi).handled = handled; + } return NOTIFY_STOP; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d8d86d0..ee05c90 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; int bit, loops; - u64 ack, status; + u64 status; + int handled = 0; perf_sample_data_init(&data, 0); @@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) loops = 0; again: + intel_pmu_ack_status(status); if (++loops > 100) { WARN_ONCE(1, "perfevents: irq loop stuck!\n"); perf_event_print_debug(); @@ -736,19 +738,22 @@ again: } inc_irq_stat(apic_perf_irqs); - ack = status; intel_pmu_lbr_read(); /* * PEBS overflow sets bit 62 in the global status register */ - if (__test_and_clear_bit(62, (unsigned long *)&status)) + if (__test_and_clear_bit(62, (unsigned long *)&status)) { + handled++; x86_pmu.drain_pebs(regs); + } for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + handled++; + if (!test_bit(bit, cpuc->active_mask)) continue; @@ -761,8 +766,6 @@ again: x86_pmu_stop(event); } - intel_pmu_ack_status(ack); - /* * Repeat if there is more work to be done: */ @@ -772,7 +775,7 @@ again: done: intel_pmu_enable_all(0); - return 1; + return handled; } static struct event_constraint * diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 7e578e9..b560db3 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -692,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) inc_irq_stat(apic_perf_irqs); } - return handled > 0; + return handled; } /* diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index e5cc7e8..ebdb85c 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,7 +18,6 @@ #include #include #include -#include static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -/* - * Force the read back of the CMP register in hpet_next_event() - * to work around the problem that the CMP register write seems to be - * delayed. See hpet_next_event() for details. - * - * We do this on all SMBUS incarnations for now until we have more - * information about the affected chipsets. - */ -static void __init ati_hpet_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_HPET_TIMER - hpet_readback_cmp = 1; -#endif -} - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, - { PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, {} }; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 351f9c0..410fdb3 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -35,7 +35,6 @@ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ u8 hpet_msi_disable; -u8 hpet_readback_cmp; #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; @@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta, * at that point and we would wait for the next hpet interrupt * forever. We found out that reading the CMP register back * forces the transfer so we can rely on the comparison with - * the counter register below. + * the counter register below. If the read back from the + * compare register does not match the value we programmed + * then we might have a real hardware problem. We can not do + * much about it here, but at least alert the user/admin with + * a prominent warning. * - * That works fine on those ATI chipsets, but on newer Intel - * chipsets (ICH9...) this triggers due to an erratum: Reading - * the comparator immediately following a write is returning - * the old value. + * An erratum on some chipsets (ICH9,..), results in + * comparator read immediately following a write returning old + * value. Workaround for this is to read this value second + * time, when first read returns old value. * - * We restrict the read back to the affected ATI chipsets (set - * by quirks) and also run it with hpet=verbose for debugging - * purposes. + * In fact the write to the comparator register is delayed up + * to two HPET cycles so the workaround we tried to restrict + * the readback to those known to be borked ATI chipsets + * failed miserably. So we give up on optimizations forever + * and penalize all HPET incarnations unconditionally. */ - if (hpet_readback_cmp || hpet_verbose) { - u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); - - if (cmp != cnt) + if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { + if (hpet_readl(HPET_Tn_CMP(timer)) != cnt) printk_once(KERN_WARNING - "hpet: compare register read back failed.\n"); + "hpet: compare register read back failed.\n"); } return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a874495..e2a5952 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -45,8 +45,7 @@ void __init setup_trampoline_page_table(void) /* Copy kernel address range */ clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, - KERNEL_PGD_BOUNDARY)); + KERNEL_PGD_PTRS); /* Initialize low mappings */ clone_pgd_range(trampoline_pg_dir, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index d632934..26a863a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -655,7 +655,7 @@ void restore_sched_clock_state(void) local_irq_save(flags); - get_cpu_var(cyc2ns_offset) = 0; + __get_cpu_var(cyc2ns_offset) = 0; offset = cyc2ns_suspend - sched_clock(); for_each_possible_cpu(cpu) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b38bd8b..66ca98a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1870,17 +1870,16 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - u64 old = c->dst.orig_val; + u64 old = c->dst.orig_val64; if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { - c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); ctxt->eflags &= ~EFLG_ZF; } else { - c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | - (u32) c->regs[VCPU_REGS_RBX]; + c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) | + (u32) c->regs[VCPU_REGS_RBX]; ctxt->eflags |= EFLG_ZF; } @@ -2616,7 +2615,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; - c->src.orig_val = c->src.val; + c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8d10c06..4b7b73c 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -64,6 +64,9 @@ static void pic_unlock(struct kvm_pic *s) if (!found) found = s->kvm->bsp_vcpu; + if (!found) + return; + kvm_vcpu_kick(found); } } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ffed068..63c3145 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -43,7 +43,6 @@ struct kvm_kpic_state { u8 irr; /* interrupt request register */ u8 imr; /* interrupt mask register */ u8 isr; /* interrupt service register */ - u8 isr_ack; /* interrupt ack detection */ u8 priority_add; /* highest irq priority */ u8 irq_base; u8 read_reg_select; @@ -56,6 +55,7 @@ struct kvm_kpic_state { u8 init4; /* true if 4 byte init */ u8 elcr; /* PIIX edge/trigger selection */ u8 elcr_mask; + u8 isr_ack; /* interrupt ack detection */ struct kvm_pic *pics_state; }; diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 84e236c..72fc70c 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -74,7 +74,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) /* * Map 'pfn' using fixed map 'type' and protections 'prot' */ -void * +void __iomem * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { /* @@ -86,12 +86,12 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - return kmap_atomic_prot_pfn(pfn, type, prot); + return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); void -iounmap_atomic(void *kvaddr, enum km_type type) +iounmap_atomic(void __iomem *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index f6b48f6..cfe4faa 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -568,8 +568,13 @@ static int __init init_sysfs(void) int error; error = sysdev_class_register(&oprofile_sysclass); - if (!error) - error = sysdev_register(&device_oprofile); + if (error) + return error; + + error = sysdev_register(&device_oprofile); + if (error) + sysdev_class_unregister(&oprofile_sysclass); + return error; } @@ -580,8 +585,10 @@ static void exit_sysfs(void) } #else -#define init_sysfs() do { } while (0) -#define exit_sysfs() do { } while (0) + +static inline int init_sysfs(void) { return 0; } +static inline void exit_sysfs(void) { } + #endif /* CONFIG_PM */ static int __init p4_init(char **cpu_type) @@ -695,6 +702,8 @@ int __init op_nmi_init(struct oprofile_operations *ops) char *cpu_type = NULL; int ret = 0; + using_nmi = 0; + if (!cpu_has_apic) return -ENODEV; @@ -774,7 +783,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) mux_init(ops); - init_sysfs(); + ret = init_sysfs(); + if (ret) + return ret; + using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; diff --git a/block/Kconfig b/block/Kconfig index 9be0b56..6c9213e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -77,6 +77,18 @@ config BLK_DEV_INTEGRITY T10/SCSI Data Integrity Field or the T13/ATA External Path Protection. If in doubt, say N. +config BLK_DEV_THROTTLING + bool "Block layer bio throttling support" + depends on BLK_CGROUP=y && EXPERIMENTAL + default n + ---help--- + Block layer bio throttling support. It can be used to limit + the IO rate to a device. IO rate policies are per cgroup and + one needs to mount and use blkio cgroup controller for creating + cgroups and specifying per device IO rate policies. + + See Documentation/cgroups/blkio-controller.txt for more information. + endif # BLOCK config BLOCK_COMPAT diff --git a/block/Makefile b/block/Makefile index 0bb499a..0fec4b3 100644 --- a/block/Makefile +++ b/block/Makefile @@ -3,12 +3,13 @@ # obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ - blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ + blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o +obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o diff --git a/block/blk-barrier.c b/block/blk-barrier.c deleted file mode 100644 index f0faefc..0000000 --- a/block/blk-barrier.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Functions related to barrier IO handling - */ -#include -#include -#include -#include -#include - -#include "blk.h" - -/** - * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @ordered: one of QUEUE_ORDERED_* - * - * Description: - * For journalled file systems, doing ordered writes on a commit - * block instead of explicitly doing wait_on_buffer (which is bad - * for performance) can be a big win. Block drivers supporting this - * feature should call this function and indicate so. - * - **/ -int blk_queue_ordered(struct request_queue *q, unsigned ordered) -{ - if (ordered != QUEUE_ORDERED_NONE && - ordered != QUEUE_ORDERED_DRAIN && - ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA && - ordered != QUEUE_ORDERED_TAG && - ordered != QUEUE_ORDERED_TAG_FLUSH && - ordered != QUEUE_ORDERED_TAG_FUA) { - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); - return -EINVAL; - } - - q->ordered = ordered; - q->next_ordered = ordered; - - return 0; -} -EXPORT_SYMBOL(blk_queue_ordered); - -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) -{ - if (!q->ordseq) - return 0; - return 1 << ffz(q->ordseq); -} - -unsigned blk_ordered_req_seq(struct request *rq) -{ - struct request_queue *q = rq->q; - - BUG_ON(q->ordseq == 0); - - if (rq == &q->pre_flush_rq) - return QUEUE_ORDSEQ_PREFLUSH; - if (rq == &q->bar_rq) - return QUEUE_ORDSEQ_BAR; - if (rq == &q->post_flush_rq) - return QUEUE_ORDSEQ_POSTFLUSH; - - /* - * !fs requests don't need to follow barrier ordering. Always - * put them at the front. This fixes the following deadlock. - * - * http://thread.gmane.org/gmane.linux.kernel/537473 - */ - if (rq->cmd_type != REQ_TYPE_FS) - return QUEUE_ORDSEQ_DRAIN; - - if ((rq->cmd_flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) - return QUEUE_ORDSEQ_DRAIN; - else - return QUEUE_ORDSEQ_DONE; -} - -bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) -{ - struct request *rq; - - if (error && !q->orderr) - q->orderr = error; - - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; - - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return false; - - /* - * Okay, sequence complete. - */ - q->ordseq = 0; - rq = q->orig_bar_rq; - __blk_end_request_all(rq, q->orderr); - return true; -} - -static void pre_flush_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); -} - -static void bar_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); -} - -static void post_flush_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); -} - -static void queue_flush(struct request_queue *q, unsigned which) -{ - struct request *rq; - rq_end_io_fn *end_io; - - if (which == QUEUE_ORDERED_DO_PREFLUSH) { - rq = &q->pre_flush_rq; - end_io = pre_flush_end_io; - } else { - rq = &q->post_flush_rq; - end_io = post_flush_end_io; - } - - blk_rq_init(q, rq); - rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; - rq->end_io = end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); -} - -static inline bool start_ordered(struct request_queue *q, struct request **rqp) -{ - struct request *rq = *rqp; - unsigned skip = 0; - - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) { - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - /* - * Empty barrier on a write-through device w/ ordered - * tag has no command to issue and without any command - * to issue, ordering by tag can't be used. Drain - * instead. - */ - if ((q->ordered & QUEUE_ORDERED_BY_TAG) && - !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { - q->ordered &= ~QUEUE_ORDERED_BY_TAG; - q->ordered |= QUEUE_ORDERED_BY_DRAIN; - } - } - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - rq = NULL; - - /* - * Queue ordered sequence. As we stack them at the head, we - * need to queue in reverse order. Note that we rely on that - * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. - */ - if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); - rq = &q->post_flush_rq; - } else - skip |= QUEUE_ORDSEQ_POSTFLUSH; - - if (q->ordered & QUEUE_ORDERED_DO_BAR) { - rq = &q->bar_rq; - - /* initialize proxy request and queue it */ - blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_WRITE; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); - rq->end_io = bar_end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - } else - skip |= QUEUE_ORDSEQ_BAR; - - if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); - rq = &q->pre_flush_rq; - } else - skip |= QUEUE_ORDSEQ_PREFLUSH; - - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) - rq = NULL; - else - skip |= QUEUE_ORDSEQ_DRAIN; - - *rqp = rq; - - /* - * Complete skipped sequences. If whole sequence is complete, - * return false to tell elevator that this request is gone. - */ - return !blk_ordered_complete_seq(q, skip, 0); -} - -bool blk_do_ordered(struct request_queue *q, struct request **rqp) -{ - struct request *rq = *rqp; - const int is_barrier = rq->cmd_type == REQ_TYPE_FS && - (rq->cmd_flags & REQ_HARDBARRIER); - - if (!q->ordseq) { - if (!is_barrier) - return true; - - if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rqp); - else { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - *rqp = NULL; - return false; - } - } - - /* - * Ordered sequence in progress - */ - - /* Special requests are not subject to ordering rules. */ - if (rq->cmd_type != REQ_TYPE_FS && - rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return true; - - if (q->ordered & QUEUE_ORDERED_BY_TAG) { - /* Ordered by tag. Blocking the next barrier is enough. */ - if (is_barrier && rq != &q->bar_rq) - *rqp = NULL; - } else { - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; - } - - return true; -} - -static void bio_end_empty_barrier(struct bio *bio, int err) -{ - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - clear_bit(BIO_UPTODATE, &bio->bi_flags); - } - if (bio->bi_private) - complete(bio->bi_private); - bio_put(bio); -} - -/** - * blkdev_issue_flush - queue a flush - * @bdev: blockdev to issue flush for - * @gfp_mask: memory allocation flags (for bio_alloc) - * @error_sector: error sector - * @flags: BLKDEV_IFL_* flags to control behaviour - * - * Description: - * Issue a flush for the block device in question. Caller can supply - * room for storing the error offset in case of a flush error, if they - * wish to. If WAIT flag is not passed then caller may check only what - * request was pushed in some internal queue for later handling. - */ -int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector, unsigned long flags) -{ - DECLARE_COMPLETION_ONSTACK(wait); - struct request_queue *q; - struct bio *bio; - int ret = 0; - - if (bdev->bd_disk == NULL) - return -ENXIO; - - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - - /* - * some block devices may not have their queue correctly set up here - * (e.g. loop device without a backing file) and so issuing a flush - * here will panic. Ensure there is a request function before issuing - * the barrier. - */ - if (!q->make_request_fn) - return -ENXIO; - - bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_empty_barrier; - bio->bi_bdev = bdev; - if (test_bit(BLKDEV_WAIT, &flags)) - bio->bi_private = &wait; - - bio_get(bio); - submit_bio(WRITE_BARRIER, bio); - if (test_bit(BLKDEV_WAIT, &flags)) { - wait_for_completion(&wait); - /* - * The driver must store the error location in ->bi_sector, if - * it supports it. For non-stacked drivers, this should be - * copied from blk_rq_pos(rq). - */ - if (error_sector) - *error_sector = bio->bi_sector; - } - - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - else if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - - bio_put(bio); - return ret; -} -EXPORT_SYMBOL(blkdev_issue_flush); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a680964..31a0d4d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -37,6 +37,12 @@ static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); +/* for encoding cft->private value on file */ +#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val)) +/* What policy owns the file, proportional or throttle */ +#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff) +#define BLKIOFILE_ATTR(val) ((val) & 0xffff) + struct cgroup_subsys blkio_subsys = { .name = "blkio", .create = blkiocg_create, @@ -59,6 +65,27 @@ static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, list_add(&pn->node, &blkcg->policy_list); } +static inline bool cftype_blkg_same_policy(struct cftype *cft, + struct blkio_group *blkg) +{ + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + + if (blkg->plid == plid) + return 1; + + return 0; +} + +/* Determines if policy node matches cgroup file being accessed */ +static inline bool pn_matches_cftype(struct cftype *cft, + struct blkio_policy_node *pn) +{ + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + int fileid = BLKIOFILE_ATTR(cft->private); + + return (plid == pn->plid && fileid == pn->fileid); +} + /* Must be called with blkcg->lock held */ static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) { @@ -67,12 +94,13 @@ static inline void blkio_policy_delete_node(struct blkio_policy_node *pn) /* Must be called with blkcg->lock held */ static struct blkio_policy_node * -blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev) +blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev, + enum blkio_policy_id plid, int fileid) { struct blkio_policy_node *pn; list_for_each_entry(pn, &blkcg->policy_list, node) { - if (pn->dev == dev) + if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid) return pn; } @@ -86,6 +114,62 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) } EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup); +static inline void +blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight) +{ + struct blkio_policy_type *blkiop; + + list_for_each_entry(blkiop, &blkio_list, list) { + /* If this policy does not own the blkg, do not send updates */ + if (blkiop->plid != blkg->plid) + continue; + if (blkiop->ops.blkio_update_group_weight_fn) + blkiop->ops.blkio_update_group_weight_fn(blkg, weight); + } +} + +static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps, + int fileid) +{ + struct blkio_policy_type *blkiop; + + list_for_each_entry(blkiop, &blkio_list, list) { + + /* If this policy does not own the blkg, do not send updates */ + if (blkiop->plid != blkg->plid) + continue; + + if (fileid == BLKIO_THROTL_read_bps_device + && blkiop->ops.blkio_update_group_read_bps_fn) + blkiop->ops.blkio_update_group_read_bps_fn(blkg, bps); + + if (fileid == BLKIO_THROTL_write_bps_device + && blkiop->ops.blkio_update_group_write_bps_fn) + blkiop->ops.blkio_update_group_write_bps_fn(blkg, bps); + } +} + +static inline void blkio_update_group_iops(struct blkio_group *blkg, + unsigned int iops, int fileid) +{ + struct blkio_policy_type *blkiop; + + list_for_each_entry(blkiop, &blkio_list, list) { + + /* If this policy does not own the blkg, do not send updates */ + if (blkiop->plid != blkg->plid) + continue; + + if (fileid == BLKIO_THROTL_read_iops_device + && blkiop->ops.blkio_update_group_read_iops_fn) + blkiop->ops.blkio_update_group_read_iops_fn(blkg, iops); + + if (fileid == BLKIO_THROTL_write_iops_device + && blkiop->ops.blkio_update_group_write_iops_fn) + blkiop->ops.blkio_update_group_write_iops_fn(blkg,iops); + } +} + /* * Add to the appropriate stat variable depending on the request type. * This should be called with the blkg->stats_lock held. @@ -341,7 +425,8 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, - struct blkio_group *blkg, void *key, dev_t dev) + struct blkio_group *blkg, void *key, dev_t dev, + enum blkio_policy_id plid) { unsigned long flags; @@ -350,6 +435,7 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, rcu_assign_pointer(blkg->key, key); blkg->blkcg_id = css_id(&blkcg->css); hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); + blkg->plid = plid; spin_unlock_irqrestore(&blkcg->lock, flags); /* Need to take css reference ? */ cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); @@ -408,51 +494,6 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) } EXPORT_SYMBOL_GPL(blkiocg_lookup_group); -#define SHOW_FUNCTION(__VAR) \ -static u64 blkiocg_##__VAR##_read(struct cgroup *cgroup, \ - struct cftype *cftype) \ -{ \ - struct blkio_cgroup *blkcg; \ - \ - blkcg = cgroup_to_blkio_cgroup(cgroup); \ - return (u64)blkcg->__VAR; \ -} - -SHOW_FUNCTION(weight); -#undef SHOW_FUNCTION - -static int -blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val) -{ - struct blkio_cgroup *blkcg; - struct blkio_group *blkg; - struct hlist_node *n; - struct blkio_policy_type *blkiop; - struct blkio_policy_node *pn; - - if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) - return -EINVAL; - - blkcg = cgroup_to_blkio_cgroup(cgroup); - spin_lock(&blkio_list_lock); - spin_lock_irq(&blkcg->lock); - blkcg->weight = (unsigned int)val; - - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { - pn = blkio_policy_search_node(blkcg, blkg->dev); - - if (pn) - continue; - - list_for_each_entry(blkiop, &blkio_list, list) - blkiop->ops.blkio_update_group_weight_fn(blkg, - blkcg->weight); - } - spin_unlock_irq(&blkcg->lock); - spin_unlock(&blkio_list_lock); - return 0; -} - static int blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) { @@ -593,52 +634,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, return disk_total; } -#define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total) \ -static int blkiocg_##__VAR##_read(struct cgroup *cgroup, \ - struct cftype *cftype, struct cgroup_map_cb *cb) \ -{ \ - struct blkio_cgroup *blkcg; \ - struct blkio_group *blkg; \ - struct hlist_node *n; \ - uint64_t cgroup_total = 0; \ - \ - if (!cgroup_lock_live_group(cgroup)) \ - return -ENODEV; \ - \ - blkcg = cgroup_to_blkio_cgroup(cgroup); \ - rcu_read_lock(); \ - hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\ - if (blkg->dev) { \ - spin_lock_irq(&blkg->stats_lock); \ - cgroup_total += blkio_get_stat(blkg, cb, \ - blkg->dev, type); \ - spin_unlock_irq(&blkg->stats_lock); \ - } \ - } \ - if (show_total) \ - cb->fill(cb, "Total", cgroup_total); \ - rcu_read_unlock(); \ - cgroup_unlock(); \ - return 0; \ -} - -SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0); -SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0); -SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1); -SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1); -SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1); -SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1); -SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1); -SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1); -#ifdef CONFIG_DEBUG_BLK_CGROUP -SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0); -SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0); -SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0); -SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0); -SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0); -#endif -#undef SHOW_FUNCTION_PER_GROUP - static int blkio_check_dev_num(dev_t dev) { int part = 0; @@ -652,13 +647,14 @@ static int blkio_check_dev_num(dev_t dev) } static int blkio_policy_parse_and_set(char *buf, - struct blkio_policy_node *newpn) + struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) { char *s[4], *p, *major_s = NULL, *minor_s = NULL; int ret; - unsigned long major, minor, temp; + unsigned long major, minor, temp, iops; int i = 0; dev_t dev; + u64 bps; memset(s, 0, sizeof(s)); @@ -705,12 +701,44 @@ static int blkio_policy_parse_and_set(char *buf, if (s[1] == NULL) return -EINVAL; - ret = strict_strtoul(s[1], 10, &temp); - if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || - temp > BLKIO_WEIGHT_MAX) - return -EINVAL; + switch (plid) { + case BLKIO_POLICY_PROP: + ret = strict_strtoul(s[1], 10, &temp); + if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || + temp > BLKIO_WEIGHT_MAX) + return -EINVAL; - newpn->weight = temp; + newpn->plid = plid; + newpn->fileid = fileid; + newpn->val.weight = temp; + break; + case BLKIO_POLICY_THROTL: + switch(fileid) { + case BLKIO_THROTL_read_bps_device: + case BLKIO_THROTL_write_bps_device: + ret = strict_strtoull(s[1], 10, &bps); + if (ret) + return -EINVAL; + + newpn->plid = plid; + newpn->fileid = fileid; + newpn->val.bps = bps; + break; + case BLKIO_THROTL_read_iops_device: + case BLKIO_THROTL_write_iops_device: + ret = strict_strtoul(s[1], 10, &iops); + if (ret) + return -EINVAL; + + newpn->plid = plid; + newpn->fileid = fileid; + newpn->val.iops = iops; + break; + } + break; + default: + BUG(); + } return 0; } @@ -720,26 +748,180 @@ unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, { struct blkio_policy_node *pn; - pn = blkio_policy_search_node(blkcg, dev); + pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP, + BLKIO_PROP_weight_device); if (pn) - return pn->weight; + return pn->val.weight; else return blkcg->weight; } EXPORT_SYMBOL_GPL(blkcg_get_weight); +uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev) +{ + struct blkio_policy_node *pn; + + pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, + BLKIO_THROTL_read_bps_device); + if (pn) + return pn->val.bps; + else + return -1; +} + +uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev) +{ + struct blkio_policy_node *pn; + pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, + BLKIO_THROTL_write_bps_device); + if (pn) + return pn->val.bps; + else + return -1; +} + +unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev) +{ + struct blkio_policy_node *pn; + + pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, + BLKIO_THROTL_read_iops_device); + if (pn) + return pn->val.iops; + else + return -1; +} + +unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev) +{ + struct blkio_policy_node *pn; + pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, + BLKIO_THROTL_write_iops_device); + if (pn) + return pn->val.iops; + else + return -1; +} -static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, - const char *buffer) +/* Checks whether user asked for deleting a policy rule */ +static bool blkio_delete_rule_command(struct blkio_policy_node *pn) +{ + switch(pn->plid) { + case BLKIO_POLICY_PROP: + if (pn->val.weight == 0) + return 1; + break; + case BLKIO_POLICY_THROTL: + switch(pn->fileid) { + case BLKIO_THROTL_read_bps_device: + case BLKIO_THROTL_write_bps_device: + if (pn->val.bps == 0) + return 1; + break; + case BLKIO_THROTL_read_iops_device: + case BLKIO_THROTL_write_iops_device: + if (pn->val.iops == 0) + return 1; + } + break; + default: + BUG(); + } + + return 0; +} + +static void blkio_update_policy_rule(struct blkio_policy_node *oldpn, + struct blkio_policy_node *newpn) +{ + switch(oldpn->plid) { + case BLKIO_POLICY_PROP: + oldpn->val.weight = newpn->val.weight; + break; + case BLKIO_POLICY_THROTL: + switch(newpn->fileid) { + case BLKIO_THROTL_read_bps_device: + case BLKIO_THROTL_write_bps_device: + oldpn->val.bps = newpn->val.bps; + break; + case BLKIO_THROTL_read_iops_device: + case BLKIO_THROTL_write_iops_device: + oldpn->val.iops = newpn->val.iops; + } + break; + default: + BUG(); + } +} + +/* + * Some rules/values in blkg have changed. Propogate those to respective + * policies. + */ +static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, + struct blkio_group *blkg, struct blkio_policy_node *pn) +{ + unsigned int weight, iops; + u64 bps; + + switch(pn->plid) { + case BLKIO_POLICY_PROP: + weight = pn->val.weight ? pn->val.weight : + blkcg->weight; + blkio_update_group_weight(blkg, weight); + break; + case BLKIO_POLICY_THROTL: + switch(pn->fileid) { + case BLKIO_THROTL_read_bps_device: + case BLKIO_THROTL_write_bps_device: + bps = pn->val.bps ? pn->val.bps : (-1); + blkio_update_group_bps(blkg, bps, pn->fileid); + break; + case BLKIO_THROTL_read_iops_device: + case BLKIO_THROTL_write_iops_device: + iops = pn->val.iops ? pn->val.iops : (-1); + blkio_update_group_iops(blkg, iops, pn->fileid); + break; + } + break; + default: + BUG(); + } +} + +/* + * A policy node rule has been updated. Propogate this update to all the + * block groups which might be affected by this update. + */ +static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg, + struct blkio_policy_node *pn) +{ + struct blkio_group *blkg; + struct hlist_node *n; + + spin_lock(&blkio_list_lock); + spin_lock_irq(&blkcg->lock); + + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + if (pn->dev != blkg->dev || pn->plid != blkg->plid) + continue; + blkio_update_blkg_policy(blkcg, blkg, pn); + } + + spin_unlock_irq(&blkcg->lock); + spin_unlock(&blkio_list_lock); +} + +static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, + const char *buffer) { int ret = 0; char *buf; struct blkio_policy_node *newpn, *pn; struct blkio_cgroup *blkcg; - struct blkio_group *blkg; int keep_newpn = 0; - struct hlist_node *n; - struct blkio_policy_type *blkiop; + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + int fileid = BLKIOFILE_ATTR(cft->private); buf = kstrdup(buffer, GFP_KERNEL); if (!buf) @@ -751,7 +933,7 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, goto free_buf; } - ret = blkio_policy_parse_and_set(buf, newpn); + ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid); if (ret) goto free_newpn; @@ -759,9 +941,9 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, spin_lock_irq(&blkcg->lock); - pn = blkio_policy_search_node(blkcg, newpn->dev); + pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid); if (!pn) { - if (newpn->weight != 0) { + if (!blkio_delete_rule_command(newpn)) { blkio_policy_insert_node(blkcg, newpn); keep_newpn = 1; } @@ -769,33 +951,17 @@ static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft, goto update_io_group; } - if (newpn->weight == 0) { - /* weight == 0 means deleteing a specific weight */ + if (blkio_delete_rule_command(newpn)) { blkio_policy_delete_node(pn); spin_unlock_irq(&blkcg->lock); goto update_io_group; } spin_unlock_irq(&blkcg->lock); - pn->weight = newpn->weight; + blkio_update_policy_rule(pn, newpn); update_io_group: - /* update weight for each cfqg */ - spin_lock(&blkio_list_lock); - spin_lock_irq(&blkcg->lock); - - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { - if (newpn->dev == blkg->dev) { - list_for_each_entry(blkiop, &blkio_list, list) - blkiop->ops.blkio_update_group_weight_fn(blkg, - newpn->weight ? - newpn->weight : - blkcg->weight); - } - } - - spin_unlock_irq(&blkcg->lock); - spin_unlock(&blkio_list_lock); + blkio_update_policy_node_blkg(blkcg, newpn); free_newpn: if (!keep_newpn) @@ -805,23 +971,256 @@ free_buf: return ret; } -static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, - struct seq_file *m) +static void +blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn) { - struct blkio_cgroup *blkcg; - struct blkio_policy_node *pn; + switch(pn->plid) { + case BLKIO_POLICY_PROP: + if (pn->fileid == BLKIO_PROP_weight_device) + seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), + MINOR(pn->dev), pn->val.weight); + break; + case BLKIO_POLICY_THROTL: + switch(pn->fileid) { + case BLKIO_THROTL_read_bps_device: + case BLKIO_THROTL_write_bps_device: + seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev), + MINOR(pn->dev), pn->val.bps); + break; + case BLKIO_THROTL_read_iops_device: + case BLKIO_THROTL_write_iops_device: + seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), + MINOR(pn->dev), pn->val.iops); + break; + } + break; + default: + BUG(); + } +} - seq_printf(m, "dev\tweight\n"); +/* cgroup files which read their data from policy nodes end up here */ +static void blkio_read_policy_node_files(struct cftype *cft, + struct blkio_cgroup *blkcg, struct seq_file *m) +{ + struct blkio_policy_node *pn; - blkcg = cgroup_to_blkio_cgroup(cgrp); if (!list_empty(&blkcg->policy_list)) { spin_lock_irq(&blkcg->lock); list_for_each_entry(pn, &blkcg->policy_list, node) { - seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev), - MINOR(pn->dev), pn->weight); + if (!pn_matches_cftype(cft, pn)) + continue; + blkio_print_policy_node(m, pn); } spin_unlock_irq(&blkcg->lock); } +} + +static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *m) +{ + struct blkio_cgroup *blkcg; + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + int name = BLKIOFILE_ATTR(cft->private); + + blkcg = cgroup_to_blkio_cgroup(cgrp); + + switch(plid) { + case BLKIO_POLICY_PROP: + switch(name) { + case BLKIO_PROP_weight_device: + blkio_read_policy_node_files(cft, blkcg, m); + return 0; + default: + BUG(); + } + break; + case BLKIO_POLICY_THROTL: + switch(name){ + case BLKIO_THROTL_read_bps_device: + case BLKIO_THROTL_write_bps_device: + case BLKIO_THROTL_read_iops_device: + case BLKIO_THROTL_write_iops_device: + blkio_read_policy_node_files(cft, blkcg, m); + return 0; + default: + BUG(); + } + break; + default: + BUG(); + } + + return 0; +} + +static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg, + struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type, + bool show_total) +{ + struct blkio_group *blkg; + struct hlist_node *n; + uint64_t cgroup_total = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) { + if (blkg->dev) { + if (!cftype_blkg_same_policy(cft, blkg)) + continue; + spin_lock_irq(&blkg->stats_lock); + cgroup_total += blkio_get_stat(blkg, cb, blkg->dev, + type); + spin_unlock_irq(&blkg->stats_lock); + } + } + if (show_total) + cb->fill(cb, "Total", cgroup_total); + rcu_read_unlock(); + return 0; +} + +/* All map kind of cgroup file get serviced by this function */ +static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct blkio_cgroup *blkcg; + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + int name = BLKIOFILE_ATTR(cft->private); + + blkcg = cgroup_to_blkio_cgroup(cgrp); + + switch(plid) { + case BLKIO_POLICY_PROP: + switch(name) { + case BLKIO_PROP_time: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_TIME, 0); + case BLKIO_PROP_sectors: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_SECTORS, 0); + case BLKIO_PROP_io_service_bytes: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_SERVICE_BYTES, 1); + case BLKIO_PROP_io_serviced: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_SERVICED, 1); + case BLKIO_PROP_io_service_time: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_SERVICE_TIME, 1); + case BLKIO_PROP_io_wait_time: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_WAIT_TIME, 1); + case BLKIO_PROP_io_merged: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_MERGED, 1); + case BLKIO_PROP_io_queued: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_QUEUED, 1); +#ifdef CONFIG_DEBUG_BLK_CGROUP + case BLKIO_PROP_dequeue: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_DEQUEUE, 0); + case BLKIO_PROP_avg_queue_size: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_AVG_QUEUE_SIZE, 0); + case BLKIO_PROP_group_wait_time: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_GROUP_WAIT_TIME, 0); + case BLKIO_PROP_idle_time: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_IDLE_TIME, 0); + case BLKIO_PROP_empty_time: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_EMPTY_TIME, 0); +#endif + default: + BUG(); + } + break; + case BLKIO_POLICY_THROTL: + switch(name){ + case BLKIO_THROTL_io_service_bytes: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_SERVICE_BYTES, 1); + case BLKIO_THROTL_io_serviced: + return blkio_read_blkg_stats(blkcg, cft, cb, + BLKIO_STAT_SERVICED, 1); + default: + BUG(); + } + break; + default: + BUG(); + } + + return 0; +} + +static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val) +{ + struct blkio_group *blkg; + struct hlist_node *n; + struct blkio_policy_node *pn; + + if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX) + return -EINVAL; + + spin_lock(&blkio_list_lock); + spin_lock_irq(&blkcg->lock); + blkcg->weight = (unsigned int)val; + + hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + pn = blkio_policy_search_node(blkcg, blkg->dev, + BLKIO_POLICY_PROP, BLKIO_PROP_weight_device); + if (pn) + continue; + + blkio_update_group_weight(blkg, blkcg->weight); + } + spin_unlock_irq(&blkcg->lock); + spin_unlock(&blkio_list_lock); + return 0; +} + +static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) { + struct blkio_cgroup *blkcg; + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + int name = BLKIOFILE_ATTR(cft->private); + + blkcg = cgroup_to_blkio_cgroup(cgrp); + + switch(plid) { + case BLKIO_POLICY_PROP: + switch(name) { + case BLKIO_PROP_weight: + return (u64)blkcg->weight; + } + break; + default: + BUG(); + } + return 0; +} + +static int +blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + struct blkio_cgroup *blkcg; + enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); + int name = BLKIOFILE_ATTR(cft->private); + + blkcg = cgroup_to_blkio_cgroup(cgrp); + + switch(plid) { + case BLKIO_POLICY_PROP: + switch(name) { + case BLKIO_PROP_weight: + return blkio_weight_write(blkcg, val); + } + break; + default: + BUG(); + } return 0; } @@ -829,46 +1228,113 @@ static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft, struct cftype blkio_files[] = { { .name = "weight_device", - .read_seq_string = blkiocg_weight_device_read, - .write_string = blkiocg_weight_device_write, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_weight_device), + .read_seq_string = blkiocg_file_read, + .write_string = blkiocg_file_write, .max_write_len = 256, }, { .name = "weight", - .read_u64 = blkiocg_weight_read, - .write_u64 = blkiocg_weight_write, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_weight), + .read_u64 = blkiocg_file_read_u64, + .write_u64 = blkiocg_file_write_u64, + }, + { + .name = "throttle.read_bps_device", + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, + BLKIO_THROTL_read_bps_device), + .read_seq_string = blkiocg_file_read, + .write_string = blkiocg_file_write, + .max_write_len = 256, + }, + + { + .name = "throttle.write_bps_device", + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, + BLKIO_THROTL_write_bps_device), + .read_seq_string = blkiocg_file_read, + .write_string = blkiocg_file_write, + .max_write_len = 256, + }, + + { + .name = "throttle.read_iops_device", + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, + BLKIO_THROTL_read_iops_device), + .read_seq_string = blkiocg_file_read, + .write_string = blkiocg_file_write, + .max_write_len = 256, + }, + + { + .name = "throttle.write_iops_device", + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, + BLKIO_THROTL_write_iops_device), + .read_seq_string = blkiocg_file_read, + .write_string = blkiocg_file_write, + .max_write_len = 256, }, { .name = "time", - .read_map = blkiocg_time_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_time), + .read_map = blkiocg_file_read_map, }, { .name = "sectors", - .read_map = blkiocg_sectors_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_sectors), + .read_map = blkiocg_file_read_map, }, { .name = "io_service_bytes", - .read_map = blkiocg_io_service_bytes_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_io_service_bytes), + .read_map = blkiocg_file_read_map, + }, + { + .name = "throttle.io_service_bytes", + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, + BLKIO_THROTL_io_service_bytes), + .read_map = blkiocg_file_read_map, }, { .name = "io_serviced", - .read_map = blkiocg_io_serviced_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_io_serviced), + .read_map = blkiocg_file_read_map, + }, + { + .name = "throttle.io_serviced", + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL, + BLKIO_THROTL_io_serviced), + .read_map = blkiocg_file_read_map, }, { .name = "io_service_time", - .read_map = blkiocg_io_service_time_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_io_service_time), + .read_map = blkiocg_file_read_map, }, { .name = "io_wait_time", - .read_map = blkiocg_io_wait_time_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_io_wait_time), + .read_map = blkiocg_file_read_map, }, { .name = "io_merged", - .read_map = blkiocg_io_merged_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_io_merged), + .read_map = blkiocg_file_read_map, }, { .name = "io_queued", - .read_map = blkiocg_io_queued_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_io_queued), + .read_map = blkiocg_file_read_map, }, { .name = "reset_stats", @@ -877,23 +1343,33 @@ struct cftype blkio_files[] = { #ifdef CONFIG_DEBUG_BLK_CGROUP { .name = "avg_queue_size", - .read_map = blkiocg_avg_queue_size_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_avg_queue_size), + .read_map = blkiocg_file_read_map, }, { .name = "group_wait_time", - .read_map = blkiocg_group_wait_time_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_group_wait_time), + .read_map = blkiocg_file_read_map, }, { .name = "idle_time", - .read_map = blkiocg_idle_time_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_idle_time), + .read_map = blkiocg_file_read_map, }, { .name = "empty_time", - .read_map = blkiocg_empty_time_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_empty_time), + .read_map = blkiocg_file_read_map, }, { .name = "dequeue", - .read_map = blkiocg_dequeue_read, + .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, + BLKIO_PROP_dequeue), + .read_map = blkiocg_file_read_map, }, #endif }; @@ -966,7 +1442,7 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup) /* Currently we do not support hierarchy deeper than two level (0,1) */ if (parent != cgroup->top_cgroup) - return ERR_PTR(-EINVAL); + return ERR_PTR(-EPERM); blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL); if (!blkcg) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 2b866ec..2070053 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -15,6 +15,11 @@ #include +enum blkio_policy_id { + BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */ + BLKIO_POLICY_THROTL, /* Throttling */ +}; + #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE) #ifndef CONFIG_BLK_CGROUP @@ -65,6 +70,35 @@ enum blkg_state_flags { BLKG_empty, }; +/* cgroup files owned by proportional weight policy */ +enum blkcg_file_name_prop { + BLKIO_PROP_weight = 1, + BLKIO_PROP_weight_device, + BLKIO_PROP_io_service_bytes, + BLKIO_PROP_io_serviced, + BLKIO_PROP_time, + BLKIO_PROP_sectors, + BLKIO_PROP_io_service_time, + BLKIO_PROP_io_wait_time, + BLKIO_PROP_io_merged, + BLKIO_PROP_io_queued, + BLKIO_PROP_avg_queue_size, + BLKIO_PROP_group_wait_time, + BLKIO_PROP_idle_time, + BLKIO_PROP_empty_time, + BLKIO_PROP_dequeue, +}; + +/* cgroup files owned by throttle policy */ +enum blkcg_file_name_throtl { + BLKIO_THROTL_read_bps_device, + BLKIO_THROTL_write_bps_device, + BLKIO_THROTL_read_iops_device, + BLKIO_THROTL_write_iops_device, + BLKIO_THROTL_io_service_bytes, + BLKIO_THROTL_io_serviced, +}; + struct blkio_cgroup { struct cgroup_subsys_state css; unsigned int weight; @@ -112,6 +146,8 @@ struct blkio_group { char path[128]; /* The device MKDEV(major, minor), this group has been created for */ dev_t dev; + /* policy which owns this blk group */ + enum blkio_policy_id plid; /* Need to serialize the stats in the case of reset/update */ spinlock_t stats_lock; @@ -121,24 +157,59 @@ struct blkio_group { struct blkio_policy_node { struct list_head node; dev_t dev; - unsigned int weight; + /* This node belongs to max bw policy or porportional weight policy */ + enum blkio_policy_id plid; + /* cgroup file to which this rule belongs to */ + int fileid; + + union { + unsigned int weight; + /* + * Rate read/write in terms of byptes per second + * Whether this rate represents read or write is determined + * by file type "fileid". + */ + u64 bps; + unsigned int iops; + } val; }; extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, dev_t dev); +extern uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, + dev_t dev); +extern uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, + dev_t dev); +extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, + dev_t dev); +extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, + dev_t dev); typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, unsigned int weight); +typedef void (blkio_update_group_read_bps_fn) (struct blkio_group *blkg, + u64 read_bps); +typedef void (blkio_update_group_write_bps_fn) (struct blkio_group *blkg, + u64 write_bps); +typedef void (blkio_update_group_read_iops_fn) (struct blkio_group *blkg, + unsigned int read_iops); +typedef void (blkio_update_group_write_iops_fn) (struct blkio_group *blkg, + unsigned int write_iops); struct blkio_policy_ops { blkio_unlink_group_fn *blkio_unlink_group_fn; blkio_update_group_weight_fn *blkio_update_group_weight_fn; + blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn; + blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn; + blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn; + blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn; }; struct blkio_policy_type { struct list_head list; struct blkio_policy_ops ops; + enum blkio_policy_id plid; }; /* Blkio controller policy registration */ @@ -212,7 +283,8 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {} extern struct blkio_cgroup blkio_root_cgroup; extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, - struct blkio_group *blkg, void *key, dev_t dev); + struct blkio_group *blkg, void *key, dev_t dev, + enum blkio_policy_id plid); extern int blkiocg_del_blkio_group(struct blkio_group *blkg); extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key); @@ -234,7 +306,8 @@ static inline struct blkio_cgroup * cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, - struct blkio_group *blkg, void *key, dev_t dev) {} + struct blkio_group *blkg, void *key, dev_t dev, + enum blkio_policy_id plid) {} static inline int blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } diff --git a/block/blk-core.c b/block/blk-core.c index ee1a1e7..2e7cef8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, { struct request_queue *q = rq->q; - if (&q->bar_rq != rq) { + if (&q->flush_rq != rq) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) @@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio, if (bio->bi_size == 0) bio_endio(bio, error); } else { - /* - * Okay, this is the barrier request in progress, just - * record the error; + * Okay, this is the sequenced flush request in + * progress, just record the error; */ - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; } } @@ -382,6 +381,7 @@ void blk_sync_queue(struct request_queue *q) del_timer_sync(&q->unplug_timer); del_timer_sync(&q->timeout); cancel_work_sync(&q->unplug_work); + throtl_shutdown_timer_wq(q); } EXPORT_SYMBOL(blk_sync_queue); @@ -459,6 +459,8 @@ void blk_cleanup_queue(struct request_queue *q) if (q->elevator) elevator_exit(q->elevator); + blk_throtl_exit(q); + blk_put_queue(q); } EXPORT_SYMBOL(blk_cleanup_queue); @@ -515,11 +517,17 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) return NULL; } + if (blk_throtl_init(q)) { + kmem_cache_free(blk_requestq_cachep, q); + return NULL; + } + setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, laptop_mode_timer_fn, (unsigned long) q); init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_LIST_HEAD(&q->pending_flushes); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1037,22 +1045,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_insert_request); -/* - * add-request adds a request to the linked list. - * queue lock is held and interrupts disabled, as we muck with the - * request queue list. - */ -static inline void add_request(struct request_queue *q, struct request *req) -{ - drive_stat_acct(req, 1); - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); -} - static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { @@ -1198,16 +1190,19 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret; unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); - const bool sync = (bio->bi_rw & REQ_SYNC); - const bool unplug = (bio->bi_rw & REQ_UNPLUG); - const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; + const bool sync = !!(bio->bi_rw & REQ_SYNC); + const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); + const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; + int where = ELEVATOR_INSERT_SORT; int rw_flags; - if ((bio->bi_rw & REQ_HARDBARRIER) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { + /* REQ_HARDBARRIER is no more */ + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { bio_endio(bio, -EOPNOTSUPP); return 0; } + /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1217,7 +1212,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { + where = ELEVATOR_INSERT_FRONT; + goto get_rq; + } + + if (elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1314,7 +1314,10 @@ get_rq: req->cpu = blk_cpu_to_group(smp_processor_id()); if (queue_should_plug(q) && elv_queue_empty(q)) blk_plug_device(q); - add_request(q, req); + + /* insert the request into the elevator */ + drive_stat_acct(req, 1); + __elv_add_request(q, req, where, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); @@ -1514,6 +1517,19 @@ static inline void __generic_make_request(struct bio *bio) if (bio_check_eod(bio, nr_sectors)) goto end_io; + /* + * Filter flush bio's early so that make_request based + * drivers without flush support don't have to worry + * about them. + */ + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + if (!nr_sectors) { + err = 0; + goto end_io; + } + } + if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || ((bio->bi_rw & REQ_SECURE) && @@ -1522,6 +1538,15 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } + blk_throtl_bio(q, &bio); + + /* + * If bio = NULL, bio has been throttled and will be submitted + * later. + */ + if (!bio) + break; + trace_block_bio_queue(q, bio); ret = q->make_request_fn(q, bio); @@ -1612,11 +1637,12 @@ void submit_bio(int rw, struct bio *bio) if (unlikely(block_dump)) { char b[BDEVNAME_SIZE]; - printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n", + printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", current->comm, task_pid_nr(current), (rw & WRITE) ? "WRITE" : "READ", (unsigned long long)bio->bi_sector, - bdevname(bio->bi_bdev, b)); + bdevname(bio->bi_bdev, b), + count); } } @@ -1768,11 +1794,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. + * Account IO completion. flush_rq isn't accounted as a + * normal IO on queueing nor completion. Accounting the + * containing request is enough. */ - if (blk_do_io_stat(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->flush_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; @@ -2497,9 +2523,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); - if (src->cmd_flags & REQ_DISCARD) - dst->cmd_flags |= REQ_DISCARD; + dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); @@ -2579,6 +2603,13 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); +int kblockd_schedule_delayed_work(struct request_queue *q, + struct delayed_work *dwork, unsigned long delay) +{ + return queue_delayed_work(kblockd_workqueue, dwork, delay); +} +EXPORT_SYMBOL(kblockd_schedule_delayed_work); + int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/blk-exec.c b/block/blk-exec.c index e1672f1..cf1456a 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -80,6 +80,7 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, DECLARE_COMPLETION_ONSTACK(wait); char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; + unsigned long hang_check; /* * we need an extra reference to the request, so we can look at @@ -95,7 +96,13 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, rq->end_io_data = &wait; blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); - wait_for_completion(&wait); + + /* Prevent hang_check timer from firing at us during very long I/O */ + hang_check = sysctl_hung_task_timeout_secs; + if (hang_check) + while (!wait_for_completion_timeout(&wait, hang_check * (HZ/2))); + else + wait_for_completion(&wait); if (rq->errors) err = -EIO; diff --git a/block/blk-flush.c b/block/blk-flush.c new file mode 100644 index 0000000..54b123d --- /dev/null +++ b/block/blk-flush.c @@ -0,0 +1,262 @@ +/* + * Functions to sequence FLUSH and FUA writes. + */ +#include +#include +#include +#include +#include + +#include "blk.h" + +/* FLUSH/FUA sequences */ +enum { + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), +}; + +static struct request *queue_next_fseq(struct request_queue *q); + +unsigned blk_flush_cur_seq(struct request_queue *q) +{ + if (!q->flush_seq) + return 0; + return 1 << ffz(q->flush_seq); +} + +static struct request *blk_flush_complete_seq(struct request_queue *q, + unsigned seq, int error) +{ + struct request *next_rq = NULL; + + if (error && !q->flush_err) + q->flush_err = error; + + BUG_ON(q->flush_seq & seq); + q->flush_seq |= seq; + + if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { + /* not complete yet, queue the next flush sequence */ + next_rq = queue_next_fseq(q); + } else { + /* complete this flush request */ + __blk_end_request_all(q->orig_flush_rq, q->flush_err); + q->orig_flush_rq = NULL; + q->flush_seq = 0; + + /* dispatch the next flush if there's one */ + if (!list_empty(&q->pending_flushes)) { + next_rq = list_entry_rq(q->pending_flushes.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; +} + +static void blk_flush_complete_seq_end_io(struct request_queue *q, + unsigned seq, int error) +{ + bool was_empty = elv_queue_empty(q); + struct request *next_rq; + + next_rq = blk_flush_complete_seq(q, seq, error); + + /* + * Moving a request silently to empty queue_head may stall the + * queue. Kick the queue in those cases. + */ + if (was_empty && next_rq) + __blk_run_queue(q); +} + +static void pre_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); +} + +static void flush_data_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); +} + +static void post_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); +} + +static void init_flush_request(struct request *rq, struct gendisk *disk) +{ + rq->cmd_type = REQ_TYPE_FS; + rq->cmd_flags = WRITE_FLUSH; + rq->rq_disk = disk; +} + +static struct request *queue_next_fseq(struct request_queue *q) +{ + struct request *orig_rq = q->orig_flush_rq; + struct request *rq = &q->flush_rq; + + blk_rq_init(q, rq); + + switch (blk_flush_cur_seq(q)) { + case QUEUE_FSEQ_PREFLUSH: + init_flush_request(rq, orig_rq->rq_disk); + rq->end_io = pre_flush_end_io; + break; + case QUEUE_FSEQ_DATA: + init_request_from_bio(rq, orig_rq->bio); + /* + * orig_rq->rq_disk may be different from + * bio->bi_bdev->bd_disk if orig_rq got here through + * remapping drivers. Make sure rq->rq_disk points + * to the same one as orig_rq. + */ + rq->rq_disk = orig_rq->rq_disk; + rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); + rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); + rq->end_io = flush_data_end_io; + break; + case QUEUE_FSEQ_POSTFLUSH: + init_flush_request(rq, orig_rq->rq_disk); + rq->end_io = post_flush_end_io; + break; + default: + BUG(); + } + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + return rq; +} + +struct request *blk_do_flush(struct request_queue *q, struct request *rq) +{ + unsigned int fflags = q->flush_flags; /* may change, cache it */ + bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; + bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); + bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); + unsigned skip = 0; + + /* + * Special case. If there's data but flush is not necessary, + * the request can be issued directly. + * + * Flush w/o data should be able to be issued directly too but + * currently some drivers assume that rq->bio contains + * non-zero data if it isn't NULL and empty FLUSH requests + * getting here usually have bio's without data. + */ + if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; + return rq; + } + + /* + * Sequenced flushes can't be processed in parallel. If + * another one is already in progress, queue for later + * processing. + */ + if (q->flush_seq) { + list_move_tail(&rq->queuelist, &q->pending_flushes); + return NULL; + } + + /* + * Start a new flush sequence + */ + q->flush_err = 0; + q->flush_seq |= QUEUE_FSEQ_STARTED; + + /* adjust FLUSH/FUA of the original request and stash it away */ + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; + blk_dequeue_request(rq); + q->orig_flush_rq = rq; + + /* skip unneded sequences and return the first one */ + if (!do_preflush) + skip |= QUEUE_FSEQ_PREFLUSH; + if (!blk_rq_sectors(rq)) + skip |= QUEUE_FSEQ_DATA; + if (!do_postflush) + skip |= QUEUE_FSEQ_POSTFLUSH; + return blk_flush_complete_seq(q, skip, 0); +} + +static void bio_end_flush(struct bio *bio, int err) +{ + if (err) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + if (bio->bi_private) + complete(bio->bi_private); + bio_put(bio); +} + +/** + * blkdev_issue_flush - queue a flush + * @bdev: blockdev to issue flush for + * @gfp_mask: memory allocation flags (for bio_alloc) + * @error_sector: error sector + * + * Description: + * Issue a flush for the block device in question. Caller can supply + * room for storing the error offset in case of a flush error, if they + * wish to. If WAIT flag is not passed then caller may check only what + * request was pushed in some internal queue for later handling. + */ +int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q; + struct bio *bio; + int ret = 0; + + if (bdev->bd_disk == NULL) + return -ENXIO; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + /* + * some block devices may not have their queue correctly set up here + * (e.g. loop device without a backing file) and so issuing a flush + * here will panic. Ensure there is a request function before issuing + * the flush. + */ + if (!q->make_request_fn) + return -ENXIO; + + bio = bio_alloc(gfp_mask, 0); + bio->bi_end_io = bio_end_flush; + bio->bi_bdev = bdev; + bio->bi_private = &wait; + + bio_get(bio); + submit_bio(WRITE_FLUSH, bio); + wait_for_completion(&wait); + + /* + * The driver must store the error location in ->bi_sector, if + * it supports it. For non-stacked drivers, this should be + * copied from blk_rq_pos(rq). + */ + if (error_sector) + *error_sector = bio->bi_sector; + + if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + + bio_put(bio); + return ret; +} +EXPORT_SYMBOL(blkdev_issue_flush); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index edce1ef..885cbb5 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -32,24 +32,37 @@ static struct kmem_cache *integrity_cachep; /** * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements - * @rq: request with integrity metadata attached + * @q: request queue + * @bio: bio with integrity metadata attached * * Description: Returns the number of elements required in a - * scatterlist corresponding to the integrity metadata in a request. + * scatterlist corresponding to the integrity metadata in a bio. */ -int blk_rq_count_integrity_sg(struct request *rq) +int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) { - struct bio_vec *iv, *ivprv; - struct req_iterator iter; - unsigned int segments; + struct bio_vec *iv, *ivprv = NULL; + unsigned int segments = 0; + unsigned int seg_size = 0; + unsigned int i = 0; - ivprv = NULL; - segments = 0; + bio_for_each_integrity_vec(iv, bio, i) { - rq_for_each_integrity_segment(iv, rq, iter) { + if (ivprv) { + if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + goto new_segment; + + if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + goto new_segment; + + if (seg_size + iv->bv_len > queue_max_segment_size(q)) + goto new_segment; - if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv)) + seg_size += iv->bv_len; + } else { +new_segment: segments++; + seg_size = iv->bv_len; + } ivprv = iv; } @@ -60,30 +73,34 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg); /** * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist - * @rq: request with integrity metadata attached + * @q: request queue + * @bio: bio with integrity metadata attached * @sglist: target scatterlist * * Description: Map the integrity vectors in request into a * scatterlist. The scatterlist must be big enough to hold all * elements. I.e. sized using blk_rq_count_integrity_sg(). */ -int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) +int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, + struct scatterlist *sglist) { - struct bio_vec *iv, *ivprv; - struct req_iterator iter; - struct scatterlist *sg; - unsigned int segments; + struct bio_vec *iv, *ivprv = NULL; + struct scatterlist *sg = NULL; + unsigned int segments = 0; + unsigned int i = 0; - ivprv = NULL; - sg = NULL; - segments = 0; - - rq_for_each_integrity_segment(iv, rq, iter) { + bio_for_each_integrity_vec(iv, bio, i) { if (ivprv) { if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) + goto new_segment; + + if (sg->length + iv->bv_len > queue_max_segment_size(q)) + goto new_segment; + sg->length += iv->bv_len; } else { new_segment: @@ -162,6 +179,40 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) } EXPORT_SYMBOL(blk_integrity_compare); +int blk_integrity_merge_rq(struct request_queue *q, struct request *req, + struct request *next) +{ + if (blk_integrity_rq(req) != blk_integrity_rq(next)) + return -1; + + if (req->nr_integrity_segments + next->nr_integrity_segments > + q->limits.max_integrity_segments) + return -1; + + return 0; +} +EXPORT_SYMBOL(blk_integrity_merge_rq); + +int blk_integrity_merge_bio(struct request_queue *q, struct request *req, + struct bio *bio) +{ + int nr_integrity_segs; + struct bio *next = bio->bi_next; + + bio->bi_next = NULL; + nr_integrity_segs = blk_rq_count_integrity_sg(q, bio); + bio->bi_next = next; + + if (req->nr_integrity_segments + nr_integrity_segs > + q->limits.max_integrity_segments) + return -1; + + req->nr_integrity_segments += nr_integrity_segs; + + return 0; +} +EXPORT_SYMBOL(blk_integrity_merge_bio); + struct integrity_sysfs_entry { struct attribute attr; ssize_t (*show)(struct blk_integrity *, char *); diff --git a/block/blk-lib.c b/block/blk-lib.c index c392029..1a320d2 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); - int type = flags & BLKDEV_IFL_BARRIER ? - DISCARD_BARRIER : DISCARD_NOBARRIER; + int type = REQ_WRITE | REQ_DISCARD; unsigned int max_discard_sectors; struct bio *bio; int ret = 0; @@ -62,10 +61,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, max_discard_sectors &= ~(disc_sects - 1); } - if (flags & BLKDEV_IFL_SECURE) { + if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; - type |= DISCARD_SECURE; + type |= REQ_SECURE; } while (nr_sects && !ret) { @@ -78,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; - if (flags & BLKDEV_IFL_WAIT) - bio->bi_private = &wait; + bio->bi_private = &wait; if (nr_sects > max_discard_sectors) { bio->bi_size = max_discard_sectors << 9; @@ -93,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_get(bio); submit_bio(type, bio); - if (flags & BLKDEV_IFL_WAIT) - wait_for_completion(&wait); + wait_for_completion(&wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -140,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err) * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_IFL_* flags to control behaviour * * Description: * Generate and issue number of bios with zerofiled pages. @@ -149,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err) */ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { int ret; struct bio *bio; @@ -162,12 +158,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bb.wait = &wait; bb.end_io = NULL; - if (flags & BLKDEV_IFL_BARRIER) { - /* issue async barrier before the data */ - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); - if (ret) - return ret; - } submit: ret = 0; while (nr_sects != 0) { @@ -181,8 +171,7 @@ submit: bio->bi_sector = sector; bio->bi_bdev = bdev; bio->bi_end_io = bio_batch_end_io; - if (flags & BLKDEV_IFL_WAIT) - bio->bi_private = &bb; + bio->bi_private = &bb; while (nr_sects != 0) { sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); @@ -199,18 +188,10 @@ submit: issued++; submit_bio(WRITE, bio); } - /* - * When all data bios are in flight. Send final barrier if requeted. - */ - if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, - flags & BLKDEV_IFL_WAIT); - - if (flags & BLKDEV_IFL_WAIT) - /* Wait for bios in-flight */ - while ( issued != atomic_read(&bb.done)) - wait_for_completion(&wait); + /* Wait for bios in-flight */ + while (issued != atomic_read(&bb.done)) + wait_for_completion(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) /* One of bios in the batch was completed with error.*/ diff --git a/block/blk-map.c b/block/blk-map.c index c65d759..d4a586d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -54,7 +54,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; - if (blk_rq_aligned(q, ubuf, len) && !map_data) + if (blk_rq_aligned(q, uaddr, len) && !map_data) bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); @@ -288,6 +288,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { int reading = rq_data_dir(rq) == READ; + unsigned long addr = (unsigned long) kbuf; int do_copy = 0; struct bio *bio; int ret; @@ -297,7 +298,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf); + do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf); if (do_copy) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else @@ -307,7 +308,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) - bio->bi_rw |= (1 << REQ_WRITE); + bio->bi_rw |= REQ_WRITE; if (do_copy) rq->cmd_flags |= REQ_COPY_USER; diff --git a/block/blk-merge.c b/block/blk-merge.c index 3b0cd42..6a72546 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -205,12 +205,11 @@ static inline int ll_new_hw_segment(struct request_queue *q, { int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) { - req->cmd_flags |= REQ_NOMERGE; - if (req == q->last_merge) - q->last_merge = NULL; - return 0; - } + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) + goto no_merge; + + if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) + goto no_merge; /* * This will form the start of a new hw segment. Bump both @@ -218,6 +217,12 @@ static inline int ll_new_hw_segment(struct request_queue *q, */ req->nr_phys_segments += nr_phys_segs; return 1; + +no_merge: + req->cmd_flags |= REQ_NOMERGE; + if (req == q->last_merge) + q->last_merge = NULL; + return 0; } int ll_back_merge_fn(struct request_queue *q, struct request *req, @@ -301,6 +306,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > queue_max_segments(q)) return 0; + if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) + return 0; + /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; return 1; @@ -372,9 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req, || next->special) return 0; - if (blk_integrity_rq(req) != blk_integrity_rq(next)) - return 0; - /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4b..567b76f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -111,6 +111,7 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); void blk_set_default_limits(struct queue_limits *lim) { lim->max_segments = BLK_MAX_SEGMENTS; + lim->max_integrity_segments = 0; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; @@ -213,16 +214,14 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) */ if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) dma = 1; - q->limits.bounce_pfn = max_low_pfn; #else if (b_pfn < blk_max_low_pfn) dma = 1; - q->limits.bounce_pfn = b_pfn; #endif + q->limits.bounce_pfn = b_pfn; if (dma) { init_emergency_isa_pool(); q->bounce_gfp = GFP_NOIO | GFP_DMA; - q->limits.bounce_pfn = b_pfn; } } EXPORT_SYMBOL(blk_queue_bounce_limit); @@ -455,11 +454,6 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) } EXPORT_SYMBOL(blk_queue_io_opt); -/* - * Returns the minimum that is _not_ zero, unless both are zero. - */ -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - /** * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers * @t: the stacking driver (top) @@ -514,6 +508,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->seg_boundary_mask); t->max_segments = min_not_zero(t->max_segments, b->max_segments); + t->max_integrity_segments = min_not_zero(t->max_integrity_segments, + b->max_integrity_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); @@ -794,6 +790,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); +/** + * blk_queue_flush - configure queue's cache flush capability + * @q: the request queue for the device + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA + * + * Tell block layer cache flush capability of @q. If it supports + * flushing, REQ_FLUSH should be set. If it supports bypassing + * write cache for individual writes, REQ_FUA should be set. + */ +void blk_queue_flush(struct request_queue *q, unsigned int flush) +{ + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); + + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) + flush &= ~REQ_FUA; + + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); +} +EXPORT_SYMBOL_GPL(blk_queue_flush); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 001ab18..da8a8a4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -112,6 +112,11 @@ static ssize_t queue_max_segments_show(struct request_queue *q, char *page) return queue_var_show(queue_max_segments(q), (page)); } +static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->limits.max_integrity_segments, (page)); +} + static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page) { if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) @@ -288,6 +293,11 @@ static struct queue_sysfs_entry queue_max_segments_entry = { .show = queue_max_segments_show, }; +static struct queue_sysfs_entry queue_max_integrity_segments_entry = { + .attr = {.name = "max_integrity_segments", .mode = S_IRUGO }, + .show = queue_max_integrity_segments_show, +}; + static struct queue_sysfs_entry queue_max_segment_size_entry = { .attr = {.name = "max_segment_size", .mode = S_IRUGO }, .show = queue_max_segment_size_show, @@ -375,6 +385,7 @@ static struct attribute *default_attrs[] = { &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, &queue_max_segments_entry.attr, + &queue_max_integrity_segments_entry.attr, &queue_max_segment_size_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, @@ -511,6 +522,7 @@ int blk_register_queue(struct gendisk *disk) kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); blk_trace_remove_sysfs(disk_to_dev(disk)); + kobject_put(&dev->kobj); return ret; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c new file mode 100644 index 0000000..af53f37 --- /dev/null +++ b/block/blk-throttle.c @@ -0,0 +1,999 @@ +/* + * Interface for controlling IO bandwidth on a request queue + * + * Copyright (C) 2010 Vivek Goyal + */ + +#include +#include +#include +#include +#include +#include "blk-cgroup.h" + +/* Max dispatch from a group in 1 round */ +static int throtl_grp_quantum = 8; + +/* Total max dispatch from all groups in one round */ +static int throtl_quantum = 32; + +/* Throttling is performed over 100ms slice and after that slice is renewed */ +static unsigned long throtl_slice = HZ/10; /* 100 ms */ + +struct throtl_rb_root { + struct rb_root rb; + struct rb_node *left; + unsigned int count; + unsigned long min_disptime; +}; + +#define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \ + .count = 0, .min_disptime = 0} + +#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) + +struct throtl_grp { + /* List of throtl groups on the request queue*/ + struct hlist_node tg_node; + + /* active throtl group service_tree member */ + struct rb_node rb_node; + + /* + * Dispatch time in jiffies. This is the estimated time when group + * will unthrottle and is ready to dispatch more bio. It is used as + * key to sort active groups in service tree. + */ + unsigned long disptime; + + struct blkio_group blkg; + atomic_t ref; + unsigned int flags; + + /* Two lists for READ and WRITE */ + struct bio_list bio_lists[2]; + + /* Number of queued bios on READ and WRITE lists */ + unsigned int nr_queued[2]; + + /* bytes per second rate limits */ + uint64_t bps[2]; + + /* IOPS limits */ + unsigned int iops[2]; + + /* Number of bytes disptached in current slice */ + uint64_t bytes_disp[2]; + /* Number of bio's dispatched in current slice */ + unsigned int io_disp[2]; + + /* When did we start a new slice */ + unsigned long slice_start[2]; + unsigned long slice_end[2]; +}; + +struct throtl_data +{ + /* List of throtl groups */ + struct hlist_head tg_list; + + /* service tree for active throtl groups */ + struct throtl_rb_root tg_service_tree; + + struct throtl_grp root_tg; + struct request_queue *queue; + + /* Total Number of queued bios on READ and WRITE lists */ + unsigned int nr_queued[2]; + + /* + * number of total undestroyed groups (excluding root group) + */ + unsigned int nr_undestroyed_grps; + + /* Work for dispatching throttled bios */ + struct delayed_work throtl_work; +}; + +enum tg_state_flags { + THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */ +}; + +#define THROTL_TG_FNS(name) \ +static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \ +{ \ + (tg)->flags |= (1 << THROTL_TG_FLAG_##name); \ +} \ +static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \ +{ \ + (tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \ +} \ +static inline int throtl_tg_##name(const struct throtl_grp *tg) \ +{ \ + return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \ +} + +THROTL_TG_FNS(on_rr); + +#define throtl_log_tg(td, tg, fmt, args...) \ + blk_add_trace_msg((td)->queue, "throtl %s " fmt, \ + blkg_path(&(tg)->blkg), ##args); \ + +#define throtl_log(td, fmt, args...) \ + blk_add_trace_msg((td)->queue, "throtl " fmt, ##args) + +static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg) +{ + if (blkg) + return container_of(blkg, struct throtl_grp, blkg); + + return NULL; +} + +static inline int total_nr_queued(struct throtl_data *td) +{ + return (td->nr_queued[0] + td->nr_queued[1]); +} + +static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) +{ + atomic_inc(&tg->ref); + return tg; +} + +static void throtl_put_tg(struct throtl_grp *tg) +{ + BUG_ON(atomic_read(&tg->ref) <= 0); + if (!atomic_dec_and_test(&tg->ref)) + return; + kfree(tg); +} + +static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, + struct cgroup *cgroup) +{ + struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); + struct throtl_grp *tg = NULL; + void *key = td; + struct backing_dev_info *bdi = &td->queue->backing_dev_info; + unsigned int major, minor; + + /* + * TODO: Speed up blkiocg_lookup_group() by maintaining a radix + * tree of blkg (instead of traversing through hash list all + * the time. + */ + tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key)); + + /* Fill in device details for root group */ + if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); + tg->blkg.dev = MKDEV(major, minor); + goto done; + } + + if (tg) + goto done; + + tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); + if (!tg) + goto done; + + INIT_HLIST_NODE(&tg->tg_node); + RB_CLEAR_NODE(&tg->rb_node); + bio_list_init(&tg->bio_lists[0]); + bio_list_init(&tg->bio_lists[1]); + + /* + * Take the initial reference that will be released on destroy + * This can be thought of a joint reference by cgroup and + * request queue which will be dropped by either request queue + * exit or cgroup deletion path depending on who is exiting first. + */ + atomic_set(&tg->ref, 1); + + /* Add group onto cgroup list */ + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); + blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td, + MKDEV(major, minor), BLKIO_POLICY_THROTL); + + tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); + tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); + tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); + tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); + + hlist_add_head(&tg->tg_node, &td->tg_list); + td->nr_undestroyed_grps++; +done: + return tg; +} + +static struct throtl_grp * throtl_get_tg(struct throtl_data *td) +{ + struct cgroup *cgroup; + struct throtl_grp *tg = NULL; + + rcu_read_lock(); + cgroup = task_cgroup(current, blkio_subsys_id); + tg = throtl_find_alloc_tg(td, cgroup); + if (!tg) + tg = &td->root_tg; + rcu_read_unlock(); + return tg; +} + +static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root) +{ + /* Service tree is empty */ + if (!root->count) + return NULL; + + if (!root->left) + root->left = rb_first(&root->rb); + + if (root->left) + return rb_entry_tg(root->left); + + return NULL; +} + +static void rb_erase_init(struct rb_node *n, struct rb_root *root) +{ + rb_erase(n, root); + RB_CLEAR_NODE(n); +} + +static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root) +{ + if (root->left == n) + root->left = NULL; + rb_erase_init(n, &root->rb); + --root->count; +} + +static void update_min_dispatch_time(struct throtl_rb_root *st) +{ + struct throtl_grp *tg; + + tg = throtl_rb_first(st); + if (!tg) + return; + + st->min_disptime = tg->disptime; +} + +static void +tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg) +{ + struct rb_node **node = &st->rb.rb_node; + struct rb_node *parent = NULL; + struct throtl_grp *__tg; + unsigned long key = tg->disptime; + int left = 1; + + while (*node != NULL) { + parent = *node; + __tg = rb_entry_tg(parent); + + if (time_before(key, __tg->disptime)) + node = &parent->rb_left; + else { + node = &parent->rb_right; + left = 0; + } + } + + if (left) + st->left = &tg->rb_node; + + rb_link_node(&tg->rb_node, parent, node); + rb_insert_color(&tg->rb_node, &st->rb); +} + +static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) +{ + struct throtl_rb_root *st = &td->tg_service_tree; + + tg_service_tree_add(st, tg); + throtl_mark_tg_on_rr(tg); + st->count++; +} + +static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg) +{ + if (!throtl_tg_on_rr(tg)) + __throtl_enqueue_tg(td, tg); +} + +static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) +{ + throtl_rb_erase(&tg->rb_node, &td->tg_service_tree); + throtl_clear_tg_on_rr(tg); +} + +static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg) +{ + if (throtl_tg_on_rr(tg)) + __throtl_dequeue_tg(td, tg); +} + +static void throtl_schedule_next_dispatch(struct throtl_data *td) +{ + struct throtl_rb_root *st = &td->tg_service_tree; + + /* + * If there are more bios pending, schedule more work. + */ + if (!total_nr_queued(td)) + return; + + BUG_ON(!st->count); + + update_min_dispatch_time(st); + + if (time_before_eq(st->min_disptime, jiffies)) + throtl_schedule_delayed_work(td->queue, 0); + else + throtl_schedule_delayed_work(td->queue, + (st->min_disptime - jiffies)); +} + +static inline void +throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) +{ + tg->bytes_disp[rw] = 0; + tg->io_disp[rw] = 0; + tg->slice_start[rw] = jiffies; + tg->slice_end[rw] = jiffies + throtl_slice; + throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', tg->slice_start[rw], + tg->slice_end[rw], jiffies); +} + +static inline void throtl_extend_slice(struct throtl_data *td, + struct throtl_grp *tg, bool rw, unsigned long jiffy_end) +{ + tg->slice_end[rw] = roundup(jiffy_end, throtl_slice); + throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', tg->slice_start[rw], + tg->slice_end[rw], jiffies); +} + +/* Determine if previously allocated or extended slice is complete or not */ +static bool +throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw) +{ + if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) + return 0; + + return 1; +} + +/* Trim the used slices and adjust slice start accordingly */ +static inline void +throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw) +{ + unsigned long nr_slices, bytes_trim, time_elapsed, io_trim; + + BUG_ON(time_before(tg->slice_end[rw], tg->slice_start[rw])); + + /* + * If bps are unlimited (-1), then time slice don't get + * renewed. Don't try to trim the slice if slice is used. A new + * slice will start when appropriate. + */ + if (throtl_slice_used(td, tg, rw)) + return; + + time_elapsed = jiffies - tg->slice_start[rw]; + + nr_slices = time_elapsed / throtl_slice; + + if (!nr_slices) + return; + + bytes_trim = (tg->bps[rw] * throtl_slice * nr_slices)/HZ; + io_trim = (tg->iops[rw] * throtl_slice * nr_slices)/HZ; + + if (!bytes_trim && !io_trim) + return; + + if (tg->bytes_disp[rw] >= bytes_trim) + tg->bytes_disp[rw] -= bytes_trim; + else + tg->bytes_disp[rw] = 0; + + if (tg->io_disp[rw] >= io_trim) + tg->io_disp[rw] -= io_trim; + else + tg->io_disp[rw] = 0; + + tg->slice_start[rw] += nr_slices * throtl_slice; + + throtl_log_tg(td, tg, "[%c] trim slice nr=%lu bytes=%lu io=%lu" + " start=%lu end=%lu jiffies=%lu", + rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim, + tg->slice_start[rw], tg->slice_end[rw], jiffies); +} + +static bool tg_with_in_iops_limit(struct throtl_data *td, struct throtl_grp *tg, + struct bio *bio, unsigned long *wait) +{ + bool rw = bio_data_dir(bio); + unsigned int io_allowed; + unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; + + jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; + + /* Slice has just started. Consider one slice interval */ + if (!jiffy_elapsed) + jiffy_elapsed_rnd = throtl_slice; + + jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice); + + io_allowed = (tg->iops[rw] * jiffies_to_msecs(jiffy_elapsed_rnd)) + / MSEC_PER_SEC; + + if (tg->io_disp[rw] + 1 <= io_allowed) { + if (wait) + *wait = 0; + return 1; + } + + /* Calc approx time to dispatch */ + jiffy_wait = ((tg->io_disp[rw] + 1) * HZ)/tg->iops[rw] + 1; + + if (jiffy_wait > jiffy_elapsed) + jiffy_wait = jiffy_wait - jiffy_elapsed; + else + jiffy_wait = 1; + + if (wait) + *wait = jiffy_wait; + return 0; +} + +static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg, + struct bio *bio, unsigned long *wait) +{ + bool rw = bio_data_dir(bio); + u64 bytes_allowed, extra_bytes; + unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; + + jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; + + /* Slice has just started. Consider one slice interval */ + if (!jiffy_elapsed) + jiffy_elapsed_rnd = throtl_slice; + + jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, throtl_slice); + + bytes_allowed = (tg->bps[rw] * jiffies_to_msecs(jiffy_elapsed_rnd)) + / MSEC_PER_SEC; + + if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) { + if (wait) + *wait = 0; + return 1; + } + + /* Calc approx time to dispatch */ + extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed; + jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]); + + if (!jiffy_wait) + jiffy_wait = 1; + + /* + * This wait time is without taking into consideration the rounding + * up we did. Add that time also. + */ + jiffy_wait = jiffy_wait + (jiffy_elapsed_rnd - jiffy_elapsed); + if (wait) + *wait = jiffy_wait; + return 0; +} + +/* + * Returns whether one can dispatch a bio or not. Also returns approx number + * of jiffies to wait before this bio is with-in IO rate and can be dispatched + */ +static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, + struct bio *bio, unsigned long *wait) +{ + bool rw = bio_data_dir(bio); + unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0; + + /* + * Currently whole state machine of group depends on first bio + * queued in the group bio list. So one should not be calling + * this function with a different bio if there are other bios + * queued. + */ + BUG_ON(tg->nr_queued[rw] && bio != bio_list_peek(&tg->bio_lists[rw])); + + /* If tg->bps = -1, then BW is unlimited */ + if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { + if (wait) + *wait = 0; + return 1; + } + + /* + * If previous slice expired, start a new one otherwise renew/extend + * existing slice to make sure it is at least throtl_slice interval + * long since now. + */ + if (throtl_slice_used(td, tg, rw)) + throtl_start_new_slice(td, tg, rw); + else { + if (time_before(tg->slice_end[rw], jiffies + throtl_slice)) + throtl_extend_slice(td, tg, rw, jiffies + throtl_slice); + } + + if (tg_with_in_bps_limit(td, tg, bio, &bps_wait) + && tg_with_in_iops_limit(td, tg, bio, &iops_wait)) { + if (wait) + *wait = 0; + return 1; + } + + max_wait = max(bps_wait, iops_wait); + + if (wait) + *wait = max_wait; + + if (time_before(tg->slice_end[rw], jiffies + max_wait)) + throtl_extend_slice(td, tg, rw, jiffies + max_wait); + + return 0; +} + +static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) +{ + bool rw = bio_data_dir(bio); + bool sync = bio->bi_rw & REQ_SYNC; + + /* Charge the bio to the group */ + tg->bytes_disp[rw] += bio->bi_size; + tg->io_disp[rw]++; + + /* + * TODO: This will take blkg->stats_lock. Figure out a way + * to avoid this cost. + */ + blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync); +} + +static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, + struct bio *bio) +{ + bool rw = bio_data_dir(bio); + + bio_list_add(&tg->bio_lists[rw], bio); + /* Take a bio reference on tg */ + throtl_ref_get_tg(tg); + tg->nr_queued[rw]++; + td->nr_queued[rw]++; + throtl_enqueue_tg(td, tg); +} + +static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg) +{ + unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime; + struct bio *bio; + + if ((bio = bio_list_peek(&tg->bio_lists[READ]))) + tg_may_dispatch(td, tg, bio, &read_wait); + + if ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) + tg_may_dispatch(td, tg, bio, &write_wait); + + min_wait = min(read_wait, write_wait); + disptime = jiffies + min_wait; + + /* + * If group is already on active tree, then update dispatch time + * only if it is lesser than existing dispatch time. Otherwise + * always update the dispatch time + */ + + if (throtl_tg_on_rr(tg) && time_before(disptime, tg->disptime)) + return; + + /* Update dispatch time */ + throtl_dequeue_tg(td, tg); + tg->disptime = disptime; + throtl_enqueue_tg(td, tg); +} + +static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg, + bool rw, struct bio_list *bl) +{ + struct bio *bio; + + bio = bio_list_pop(&tg->bio_lists[rw]); + tg->nr_queued[rw]--; + /* Drop bio reference on tg */ + throtl_put_tg(tg); + + BUG_ON(td->nr_queued[rw] <= 0); + td->nr_queued[rw]--; + + throtl_charge_bio(tg, bio); + bio_list_add(bl, bio); + bio->bi_rw |= REQ_THROTTLED; + + throtl_trim_slice(td, tg, rw); +} + +static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg, + struct bio_list *bl) +{ + unsigned int nr_reads = 0, nr_writes = 0; + unsigned int max_nr_reads = throtl_grp_quantum*3/4; + unsigned int max_nr_writes = throtl_grp_quantum - nr_reads; + struct bio *bio; + + /* Try to dispatch 75% READS and 25% WRITES */ + + while ((bio = bio_list_peek(&tg->bio_lists[READ])) + && tg_may_dispatch(td, tg, bio, NULL)) { + + tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); + nr_reads++; + + if (nr_reads >= max_nr_reads) + break; + } + + while ((bio = bio_list_peek(&tg->bio_lists[WRITE])) + && tg_may_dispatch(td, tg, bio, NULL)) { + + tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl); + nr_writes++; + + if (nr_writes >= max_nr_writes) + break; + } + + return nr_reads + nr_writes; +} + +static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) +{ + unsigned int nr_disp = 0; + struct throtl_grp *tg; + struct throtl_rb_root *st = &td->tg_service_tree; + + while (1) { + tg = throtl_rb_first(st); + + if (!tg) + break; + + if (time_before(jiffies, tg->disptime)) + break; + + throtl_dequeue_tg(td, tg); + + nr_disp += throtl_dispatch_tg(td, tg, bl); + + if (tg->nr_queued[0] || tg->nr_queued[1]) { + tg_update_disptime(td, tg); + throtl_enqueue_tg(td, tg); + } + + if (nr_disp >= throtl_quantum) + break; + } + + return nr_disp; +} + +/* Dispatch throttled bios. Should be called without queue lock held. */ +static int throtl_dispatch(struct request_queue *q) +{ + struct throtl_data *td = q->td; + unsigned int nr_disp = 0; + struct bio_list bio_list_on_stack; + struct bio *bio; + + spin_lock_irq(q->queue_lock); + + if (!total_nr_queued(td)) + goto out; + + bio_list_init(&bio_list_on_stack); + + throtl_log(td, "dispatch nr_queued=%lu read=%u write=%u", + total_nr_queued(td), td->nr_queued[READ], + td->nr_queued[WRITE]); + + nr_disp = throtl_select_dispatch(td, &bio_list_on_stack); + + if (nr_disp) + throtl_log(td, "bios disp=%u", nr_disp); + + throtl_schedule_next_dispatch(td); +out: + spin_unlock_irq(q->queue_lock); + + /* + * If we dispatched some requests, unplug the queue to make sure + * immediate dispatch + */ + if (nr_disp) { + while((bio = bio_list_pop(&bio_list_on_stack))) + generic_make_request(bio); + blk_unplug(q); + } + return nr_disp; +} + +void blk_throtl_work(struct work_struct *work) +{ + struct throtl_data *td = container_of(work, struct throtl_data, + throtl_work.work); + struct request_queue *q = td->queue; + + throtl_dispatch(q); +} + +/* Call with queue lock held */ +void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) +{ + + struct throtl_data *td = q->td; + struct delayed_work *dwork = &td->throtl_work; + + if (total_nr_queued(td) > 0) { + /* + * We might have a work scheduled to be executed in future. + * Cancel that and schedule a new one. + */ + __cancel_delayed_work(dwork); + kblockd_schedule_delayed_work(q, dwork, delay); + throtl_log(td, "schedule work. delay=%lu jiffies=%lu", + delay, jiffies); + } +} +EXPORT_SYMBOL(throtl_schedule_delayed_work); + +static void +throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) +{ + /* Something wrong if we are trying to remove same group twice */ + BUG_ON(hlist_unhashed(&tg->tg_node)); + + hlist_del_init(&tg->tg_node); + + /* + * Put the reference taken at the time of creation so that when all + * queues are gone, group can be destroyed. + */ + throtl_put_tg(tg); + td->nr_undestroyed_grps--; +} + +static void throtl_release_tgs(struct throtl_data *td) +{ + struct hlist_node *pos, *n; + struct throtl_grp *tg; + + hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { + /* + * If cgroup removal path got to blk_group first and removed + * it from cgroup list, then it will take care of destroying + * cfqg also. + */ + if (!blkiocg_del_blkio_group(&tg->blkg)) + throtl_destroy_tg(td, tg); + } +} + +static void throtl_td_free(struct throtl_data *td) +{ + kfree(td); +} + +/* + * Blk cgroup controller notification saying that blkio_group object is being + * delinked as associated cgroup object is going away. That also means that + * no new IO will come in this group. So get rid of this group as soon as + * any pending IO in the group is finished. + * + * This function is called under rcu_read_lock(). key is the rcu protected + * pointer. That means "key" is a valid throtl_data pointer as long as we are + * rcu read lock. + * + * "key" was fetched from blkio_group under blkio_cgroup->lock. That means + * it should not be NULL as even if queue was going away, cgroup deltion + * path got to it first. + */ +void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) +{ + unsigned long flags; + struct throtl_data *td = key; + + spin_lock_irqsave(td->queue->queue_lock, flags); + throtl_destroy_tg(td, tg_of_blkg(blkg)); + spin_unlock_irqrestore(td->queue->queue_lock, flags); +} + +static void throtl_update_blkio_group_read_bps (struct blkio_group *blkg, + u64 read_bps) +{ + tg_of_blkg(blkg)->bps[READ] = read_bps; +} + +static void throtl_update_blkio_group_write_bps (struct blkio_group *blkg, + u64 write_bps) +{ + tg_of_blkg(blkg)->bps[WRITE] = write_bps; +} + +static void throtl_update_blkio_group_read_iops (struct blkio_group *blkg, + unsigned int read_iops) +{ + tg_of_blkg(blkg)->iops[READ] = read_iops; +} + +static void throtl_update_blkio_group_write_iops (struct blkio_group *blkg, + unsigned int write_iops) +{ + tg_of_blkg(blkg)->iops[WRITE] = write_iops; +} + +void throtl_shutdown_timer_wq(struct request_queue *q) +{ + struct throtl_data *td = q->td; + + cancel_delayed_work_sync(&td->throtl_work); +} + +static struct blkio_policy_type blkio_policy_throtl = { + .ops = { + .blkio_unlink_group_fn = throtl_unlink_blkio_group, + .blkio_update_group_read_bps_fn = + throtl_update_blkio_group_read_bps, + .blkio_update_group_write_bps_fn = + throtl_update_blkio_group_write_bps, + .blkio_update_group_read_iops_fn = + throtl_update_blkio_group_read_iops, + .blkio_update_group_write_iops_fn = + throtl_update_blkio_group_write_iops, + }, + .plid = BLKIO_POLICY_THROTL, +}; + +int blk_throtl_bio(struct request_queue *q, struct bio **biop) +{ + struct throtl_data *td = q->td; + struct throtl_grp *tg; + struct bio *bio = *biop; + bool rw = bio_data_dir(bio), update_disptime = true; + + if (bio->bi_rw & REQ_THROTTLED) { + bio->bi_rw &= ~REQ_THROTTLED; + return 0; + } + + spin_lock_irq(q->queue_lock); + tg = throtl_get_tg(td); + + if (tg->nr_queued[rw]) { + /* + * There is already another bio queued in same dir. No + * need to update dispatch time. + */ + update_disptime = false; + goto queue_bio; + } + + /* Bio is with-in rate limit of group */ + if (tg_may_dispatch(td, tg, bio, NULL)) { + throtl_charge_bio(tg, bio); + goto out; + } + +queue_bio: + throtl_log_tg(td, tg, "[%c] bio. bdisp=%u sz=%u bps=%llu" + " iodisp=%u iops=%u queued=%d/%d", + rw == READ ? 'R' : 'W', + tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], + tg->io_disp[rw], tg->iops[rw], + tg->nr_queued[READ], tg->nr_queued[WRITE]); + + throtl_add_bio_tg(q->td, tg, bio); + *biop = NULL; + + if (update_disptime) { + tg_update_disptime(td, tg); + throtl_schedule_next_dispatch(td); + } + +out: + spin_unlock_irq(q->queue_lock); + return 0; +} + +int blk_throtl_init(struct request_queue *q) +{ + struct throtl_data *td; + struct throtl_grp *tg; + + td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); + if (!td) + return -ENOMEM; + + INIT_HLIST_HEAD(&td->tg_list); + td->tg_service_tree = THROTL_RB_ROOT; + + /* Init root group */ + tg = &td->root_tg; + INIT_HLIST_NODE(&tg->tg_node); + RB_CLEAR_NODE(&tg->rb_node); + bio_list_init(&tg->bio_lists[0]); + bio_list_init(&tg->bio_lists[1]); + + /* Practically unlimited BW */ + tg->bps[0] = tg->bps[1] = -1; + tg->iops[0] = tg->iops[1] = -1; + atomic_set(&tg->ref, 1); + + INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); + + rcu_read_lock(); + blkiocg_add_blkio_group(&blkio_root_cgroup, &tg->blkg, (void *)td, + 0, BLKIO_POLICY_THROTL); + rcu_read_unlock(); + + /* Attach throtl data to request queue */ + td->queue = q; + q->td = td; + return 0; +} + +void blk_throtl_exit(struct request_queue *q) +{ + struct throtl_data *td = q->td; + bool wait = false; + + BUG_ON(!td); + + throtl_shutdown_timer_wq(q); + + spin_lock_irq(q->queue_lock); + throtl_release_tgs(td); + blkiocg_del_blkio_group(&td->root_tg.blkg); + + /* If there are other groups */ + if (td->nr_undestroyed_grps >= 1) + wait = true; + + spin_unlock_irq(q->queue_lock); + + /* + * Wait for tg->blkg->key accessors to exit their grace periods. + * Do this wait only if there are other undestroyed groups out + * there (other than root group). This can happen if cgroup deletion + * path claimed the responsibility of cleaning up a group before + * queue cleanup code get to the group. + * + * Do not call synchronize_rcu() unconditionally as there are drivers + * which create/delete request queue hundreds of times during scan/boot + * and synchronize_rcu() can take significant time and slow down boot. + */ + if (wait) + synchronize_rcu(); + throtl_td_free(td); +} + +static int __init throtl_init(void) +{ + blkio_policy_register(&blkio_policy_throtl); + return 0; +} + +module_init(throtl_init); diff --git a/block/blk.h b/block/blk.h index 6e7dc87..2db8f32 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +struct request *blk_do_flush(struct request_queue *q, struct request *rq); + static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -58,7 +60,11 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (blk_do_ordered(q, &rq)) + if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || + rq == &q->flush_rq) + return rq; + rq = blk_do_flush(q, rq); + if (rq) return rq; } @@ -132,24 +138,20 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) return q->nr_congestion_off; } -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -#define rq_for_each_integrity_segment(bvl, _rq, _iter) \ - __rq_for_each_bio(_iter.bio, _rq) \ - bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i) - -#endif /* BLK_DEV_INTEGRITY */ - static inline int blk_cpu_to_group(int cpu) { + int group = NR_CPUS; #ifdef CONFIG_SCHED_MC const struct cpumask *mask = cpu_coregroup_mask(cpu); - return cpumask_first(mask); + group = cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) - return cpumask_first(topology_thread_cpumask(cpu)); + group = cpumask_first(topology_thread_cpumask(cpu)); #else return cpu; #endif + if (likely(group < NR_CPUS)) + return group; + return cpu; } /* diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index eb4086f..95954d3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -30,6 +30,7 @@ static const int cfq_slice_sync = HZ / 10; static int cfq_slice_async = HZ / 25; static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 125; +static int cfq_group_idle = HZ / 125; static const int cfq_target_latency = HZ * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; @@ -147,6 +148,8 @@ struct cfq_queue { struct cfq_queue *new_cfqq; struct cfq_group *cfqg; struct cfq_group *orig_cfqg; + /* Number of sectors dispatched from queue in single dispatch round */ + unsigned long nr_sectors; }; /* @@ -198,6 +201,8 @@ struct cfq_group { struct hlist_node cfqd_node; atomic_t ref; #endif + /* number of requests that are on the dispatch list or inside driver */ + int dispatched; }; /* @@ -216,7 +221,6 @@ struct cfq_data { enum wl_type_t serving_type; unsigned long workload_expires; struct cfq_group *serving_group; - bool noidle_tree_requires_idle; /* * Each priority tree is sorted by next_request position. These @@ -271,6 +275,7 @@ struct cfq_data { unsigned int cfq_slice[2]; unsigned int cfq_slice_async_rq; unsigned int cfq_slice_idle; + unsigned int cfq_group_idle; unsigned int cfq_latency; unsigned int cfq_group_isolation; @@ -378,6 +383,21 @@ CFQ_CFQQ_FNS(wait_busy); &cfqg->service_trees[i][j]: NULL) \ +static inline bool iops_mode(struct cfq_data *cfqd) +{ + /* + * If we are not idling on queues and it is a NCQ drive, parallel + * execution of requests is on and measuring time is not possible + * in most of the cases until and unless we drive shallower queue + * depths and that becomes a performance bottleneck. In such cases + * switch to start providing fairness in terms of number of IOs. + */ + if (!cfqd->cfq_slice_idle && cfqd->hw_tag) + return true; + else + return false; +} + static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) { if (cfq_class_idle(cfqq)) @@ -906,7 +926,6 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) slice_used = cfqq->allocated_slice; } - cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used); return slice_used; } @@ -914,19 +933,21 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, struct cfq_queue *cfqq) { struct cfq_rb_root *st = &cfqd->grp_service_tree; - unsigned int used_sl, charge_sl; + unsigned int used_sl, charge; int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) - cfqg->service_tree_idle.count; BUG_ON(nr_sync < 0); - used_sl = charge_sl = cfq_cfqq_slice_usage(cfqq); + used_sl = charge = cfq_cfqq_slice_usage(cfqq); - if (!cfq_cfqq_sync(cfqq) && !nr_sync) - charge_sl = cfqq->allocated_slice; + if (iops_mode(cfqd)) + charge = cfqq->slice_dispatch; + else if (!cfq_cfqq_sync(cfqq) && !nr_sync) + charge = cfqq->allocated_slice; /* Can't update vdisktime while group is on service tree */ cfq_rb_erase(&cfqg->rb_node, st); - cfqg->vdisktime += cfq_scale_slice(charge_sl, cfqg); + cfqg->vdisktime += cfq_scale_slice(charge, cfqg); __cfq_group_service_tree_add(st, cfqg); /* This group is being expired. Save the context */ @@ -940,6 +961,9 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); + cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" + " sect=%u", used_sl, cfqq->slice_dispatch, charge, + iops_mode(cfqd), cfqq->nr_sectors); cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); cfq_blkiocg_set_start_empty_time(&cfqg->blkg); } @@ -994,10 +1018,20 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) */ atomic_set(&cfqg->ref, 1); - /* Add group onto cgroup list */ - sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, + /* + * Add group onto cgroup list. It might happen that bdi->dev is + * not initiliazed yet. Initialize this new group without major + * and minor info and this info will be filled in once a new thread + * comes for IO. See code above. + */ + if (bdi->dev) { + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); + cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, MKDEV(major, minor)); + } else + cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, + 0); + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); /* Add group on cfqd list */ @@ -1587,6 +1621,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, cfqq->allocated_slice = 0; cfqq->slice_end = 0; cfqq->slice_dispatch = 0; + cfqq->nr_sectors = 0; cfq_clear_cfqq_wait_request(cfqq); cfq_clear_cfqq_must_dispatch(cfqq); @@ -1839,6 +1874,9 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!service_tree); BUG_ON(!service_tree->count); + if (!cfqd->cfq_slice_idle) + return false; + /* We never do for idle class queues. */ if (prio == IDLE_WORKLOAD) return false; @@ -1863,7 +1901,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) { struct cfq_queue *cfqq = cfqd->active_queue; struct cfq_io_context *cic; - unsigned long sl; + unsigned long sl, group_idle = 0; /* * SSD device without seek penalty, disable idling. But only do so @@ -1879,8 +1917,13 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) /* * idle is disabled, either manually or by past process history */ - if (!cfqd->cfq_slice_idle || !cfq_should_idle(cfqd, cfqq)) - return; + if (!cfq_should_idle(cfqd, cfqq)) { + /* no queue idling. Check for group idling */ + if (cfqd->cfq_group_idle) + group_idle = cfqd->cfq_group_idle; + else + return; + } /* * still active requests from this queue, don't idle @@ -1907,13 +1950,21 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) return; } + /* There are other queues in the group, don't do group idle */ + if (group_idle && cfqq->cfqg->nr_cfqq > 1) + return; + cfq_mark_cfqq_wait_request(cfqq); - sl = cfqd->cfq_slice_idle; + if (group_idle) + sl = cfqd->cfq_group_idle; + else + sl = cfqd->cfq_slice_idle; mod_timer(&cfqd->idle_slice_timer, jiffies + sl); cfq_blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg); - cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); + cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl, + group_idle ? 1 : 0); } /* @@ -1929,9 +1980,11 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq); cfq_remove_request(rq); cfqq->dispatched++; + (RQ_CFQG(rq))->dispatched++; elv_dispatch_sort(q, rq); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; + cfqq->nr_sectors += blk_rq_sectors(rq); cfq_blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq), rq_data_dir(rq), rq_is_sync(rq)); } @@ -2126,7 +2179,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) slice = max_t(unsigned, slice, CFQ_MIN_TT); cfq_log(cfqd, "workload slice:%d", slice); cfqd->workload_expires = jiffies + slice; - cfqd->noidle_tree_requires_idle = false; } static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) @@ -2198,7 +2250,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) cfqq = NULL; goto keep_queue; } else - goto expire; + goto check_group_idle; } /* @@ -2226,8 +2278,23 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) * flight or is idling for a new request, allow either of these * conditions to happen (or time out) before selecting a new queue. */ - if (timer_pending(&cfqd->idle_slice_timer) || - (cfqq->dispatched && cfq_should_idle(cfqd, cfqq))) { + if (timer_pending(&cfqd->idle_slice_timer)) { + cfqq = NULL; + goto keep_queue; + } + + if (cfqq->dispatched && cfq_should_idle(cfqd, cfqq)) { + cfqq = NULL; + goto keep_queue; + } + + /* + * If group idle is enabled and there are requests dispatched from + * this group, wait for requests to complete. + */ +check_group_idle: + if (cfqd->cfq_group_idle && cfqq->cfqg->nr_cfqq == 1 + && cfqq->cfqg->dispatched) { cfqq = NULL; goto keep_queue; } @@ -3108,7 +3175,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq->queued[0] + cfqq->queued[1] >= 4) cfq_mark_cfqq_deep(cfqq); - if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || + if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) + enable_idle = 0; + else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { @@ -3375,6 +3444,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) WARN_ON(!cfqq->dispatched); cfqd->rq_in_driver--; cfqq->dispatched--; + (RQ_CFQG(rq))->dispatched--; cfq_blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq), rq_io_start_time_ns(rq), rq_data_dir(rq), rq_is_sync(rq)); @@ -3404,7 +3474,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) * the queue. */ if (cfq_should_wait_busy(cfqd, cfqq)) { - cfqq->slice_end = jiffies + cfqd->cfq_slice_idle; + unsigned long extend_sl = cfqd->cfq_slice_idle; + if (!cfqd->cfq_slice_idle) + extend_sl = cfqd->cfq_group_idle; + cfqq->slice_end = jiffies + extend_sl; cfq_mark_cfqq_wait_busy(cfqq); cfq_log_cfqq(cfqd, cfqq, "will busy wait"); } @@ -3421,17 +3494,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_slice_expired(cfqd, 1); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { - cfqd->noidle_tree_requires_idle |= - !(rq->cmd_flags & REQ_NOIDLE); - /* - * Idling is enabled for SYNC_WORKLOAD. - * SYNC_NOIDLE_WORKLOAD idles at the end of the tree - * only if we processed at least one !REQ_NOIDLE request - */ - if (cfqd->serving_type == SYNC_WORKLOAD - || cfqd->noidle_tree_requires_idle - || cfqq->cfqg->nr_cfqq == 1) - cfq_arm_slice_timer(cfqd); + cfq_arm_slice_timer(cfqd); } } @@ -3850,6 +3913,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; + cfqd->cfq_group_idle = cfq_group_idle; cfqd->cfq_latency = 1; cfqd->cfq_group_isolation = 0; cfqd->hw_tag = -1; @@ -3922,6 +3986,7 @@ SHOW_FUNCTION(cfq_fifo_expire_async_show, cfqd->cfq_fifo_expire[0], 1); SHOW_FUNCTION(cfq_back_seek_max_show, cfqd->cfq_back_max, 0); SHOW_FUNCTION(cfq_back_seek_penalty_show, cfqd->cfq_back_penalty, 0); SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1); +SHOW_FUNCTION(cfq_group_idle_show, cfqd->cfq_group_idle, 1); SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); @@ -3954,6 +4019,7 @@ STORE_FUNCTION(cfq_back_seek_max_store, &cfqd->cfq_back_max, 0, UINT_MAX, 0); STORE_FUNCTION(cfq_back_seek_penalty_store, &cfqd->cfq_back_penalty, 1, UINT_MAX, 0); STORE_FUNCTION(cfq_slice_idle_store, &cfqd->cfq_slice_idle, 0, UINT_MAX, 1); +STORE_FUNCTION(cfq_group_idle_store, &cfqd->cfq_group_idle, 0, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, @@ -3975,6 +4041,7 @@ static struct elv_fs_entry cfq_attrs[] = { CFQ_ATTR(slice_async), CFQ_ATTR(slice_async_rq), CFQ_ATTR(slice_idle), + CFQ_ATTR(group_idle), CFQ_ATTR(low_latency), CFQ_ATTR(group_isolation), __ATTR_NULL @@ -4013,6 +4080,7 @@ static struct blkio_policy_type blkio_policy_cfq = { .blkio_unlink_group_fn = cfq_unlink_blkio_group, .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, }, + .plid = BLKIO_POLICY_PROP, }; #else static struct blkio_policy_type blkio_policy_cfq; @@ -4028,6 +4096,12 @@ static int __init cfq_init(void) if (!cfq_slice_idle) cfq_slice_idle = 1; +#ifdef CONFIG_CFQ_GROUP_IOSCHED + if (!cfq_group_idle) + cfq_group_idle = 1; +#else + cfq_group_idle = 0; +#endif if (cfq_slab_setup()) return -ENOMEM; diff --git a/block/cfq.h b/block/cfq.h index 93448e5..54a6d90 100644 --- a/block/cfq.h +++ b/block/cfq.h @@ -69,7 +69,7 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, struct blkio_group *blkg, void *key, dev_t dev) { - blkiocg_add_blkio_group(blkcg, blkg, key, dev); + blkiocg_add_blkio_group(blkcg, blkg, key, dev, BLKIO_POLICY_PROP); } static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) diff --git a/block/elevator.c b/block/elevator.c index ec585c9..c3ffd86 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q) void elv_insert(struct request_queue *q, struct request *rq, int where) { - struct list_head *pos; - unsigned ordseq; int unplug_it = 1; trace_block_rq_insert(q, rq); @@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) rq->q = q; switch (where) { + case ELEVATOR_INSERT_REQUEUE: + /* + * Most requeues happen because of a busy condition, + * don't force unplug of the queue for that case. + * Clear unplug_it and fall through. + */ + unplug_it = 0; + case ELEVATOR_INSERT_FRONT: rq->cmd_flags |= REQ_SOFTBARRIER; - list_add(&rq->queuelist, &q->queue_head); break; @@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) q->elevator->ops->elevator_add_req_fn(q, rq); break; - case ELEVATOR_INSERT_REQUEUE: - /* - * If ordered flush isn't in progress, we do front - * insertion; otherwise, requests should be requeued - * in ordseq order. - */ - rq->cmd_flags |= REQ_SOFTBARRIER; - - /* - * Most requeues happen because of a busy condition, - * don't force unplug of the queue for that case. - */ - unplug_it = 0; - - if (q->ordseq == 0) { - list_add(&rq->queuelist, &q->queue_head); - break; - } - - ordseq = blk_ordered_req_seq(rq); - - list_for_each(pos, &q->queue_head) { - struct request *pos_rq = list_entry_rq(pos); - if (ordseq <= blk_ordered_req_seq(pos_rq)) - break; - } - - list_add_tail(&rq->queuelist, pos); - break; - default: printk(KERN_ERR "%s: bad insertion point %d\n", __func__, where); @@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { - if (q->ordcolor) - rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { - /* - * toggle ordered color - */ - if (rq->cmd_flags & REQ_HARDBARRIER) - q->ordcolor ^= 1; - - /* - * barriers implicitly indicate back insertion - */ - if (where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; - - /* - * this request is scheduling boundary, update - * end_sector - */ + /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || (rq->cmd_flags & REQ_DISCARD)) { q->end_sector = rq_end_sector(rq); @@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq) e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } - - /* - * Check if the queue is waiting for fs requests to be - * drained for flush sequence. - */ - if (unlikely(q->ordseq)) { - struct request *next = NULL; - - if (!list_empty(&q->queue_head)) - next = list_entry_rq(q->queue_head.next); - - if (!queue_in_flight(q) && - blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && - (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { - blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - __blk_run_queue(q); - } - } } #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) @@ -1009,18 +948,19 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) { struct elevator_queue *old_elevator, *e; void *data; + int err; /* * Allocate new elevator */ e = elevator_alloc(q, new_e); if (!e) - return 0; + return -ENOMEM; data = elevator_init_queue(q, e); if (!data) { kobject_put(&e->kobj); - return 0; + return -ENOMEM; } /* @@ -1043,7 +983,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) __elv_unregister_queue(old_elevator); - if (elv_register_queue(q)) + err = elv_register_queue(q); + if (err) goto fail_register; /* @@ -1056,7 +997,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); - return 1; + return 0; fail_register: /* @@ -1071,17 +1012,19 @@ fail_register: queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); spin_unlock_irq(q->queue_lock); - return 0; + return err; } -ssize_t elv_iosched_store(struct request_queue *q, const char *name, - size_t count) +/* + * Switch this queue to the given IO scheduler. + */ +int elevator_change(struct request_queue *q, const char *name) { char elevator_name[ELV_NAME_MAX]; struct elevator_type *e; if (!q->elevator) - return count; + return -ENXIO; strlcpy(elevator_name, name, sizeof(elevator_name)); e = elevator_get(strstrip(elevator_name)); @@ -1092,13 +1035,27 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name, if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) { elevator_put(e); - return count; + return 0; } - if (!elevator_switch(q, e)) - printk(KERN_ERR "elevator: switch to %s failed\n", - elevator_name); - return count; + return elevator_switch(q, e); +} +EXPORT_SYMBOL(elevator_change); + +ssize_t elv_iosched_store(struct request_queue *q, const char *name, + size_t count) +{ + int ret; + + if (!q->elevator) + return count; + + ret = elevator_change(q, name); + if (!ret) + return count; + + printk(KERN_ERR "elevator: switch to %s failed\n", name); + return ret; } ssize_t elv_iosched_show(struct request_queue *q, char *name) diff --git a/block/genhd.c b/block/genhd.c index 59a2db6..7923e72 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -541,13 +541,15 @@ void add_disk(struct gendisk *disk) disk->major = MAJOR(devt); disk->first_minor = MINOR(devt); + /* Register BDI before referencing it from bdev */ + bdi = &disk->queue->backing_dev_info; + bdi_register_dev(bdi, disk_devt(disk)); + blk_register_region(disk_devt(disk), disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); - bdi = &disk->queue->backing_dev_info; - bdi_register_dev(bdi, disk_devt(disk)); retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, "bdi"); WARN_ON(retval); @@ -642,6 +644,7 @@ void __init printk_all_partitions(void) struct hd_struct *part; char name_buf[BDEVNAME_SIZE]; char devt_buf[BDEVT_SIZE]; + u8 uuid[PARTITION_META_INFO_UUIDLTH * 2 + 1]; /* * Don't show empty devices or things that have been @@ -660,10 +663,14 @@ void __init printk_all_partitions(void) while ((part = disk_part_iter_next(&piter))) { bool is_part0 = part == &disk->part0; - printk("%s%s %10llu %s", is_part0 ? "" : " ", + uuid[0] = 0; + if (part->info) + part_unpack_uuid(part->info->uuid, uuid); + + printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), (unsigned long long)part->nr_sects >> 1, - disk_name(disk, part->partno, name_buf)); + disk_name(disk, part->partno, name_buf), uuid); if (is_part0) { if (disk->driverfs_dev != NULL && disk->driverfs_dev->driver != NULL) @@ -1004,6 +1011,7 @@ static void disk_release(struct device *dev) kfree(disk->random); disk_replace_part_tbl(disk, NULL); free_part_stats(&disk->part0); + free_part_info(&disk->part0); kfree(disk); } struct class block_class = { diff --git a/block/ioctl.c b/block/ioctl.c index d8052f0..d724ceb 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -62,7 +62,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user /* all seems OK */ part = add_partition(disk, partno, start, length, - ADDPART_FLAG_NONE); + ADDPART_FLAG_NONE, NULL); mutex_unlock(&bdev->bd_mutex); return IS_ERR(part) ? PTR_ERR(part) : 0; case BLKPG_DEL_PARTITION: @@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev) static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, uint64_t len, int secure) { - unsigned long flags = BLKDEV_IFL_WAIT; + unsigned long flags = 0; if (start & 511) return -EINVAL; @@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, if (start + len > (bdev->bd_inode->i_size >> 9)) return -EINVAL; if (secure) - flags |= BLKDEV_IFL_SECURE; + flags |= BLKDEV_DISCARD_SECURE; return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); } diff --git a/crypto/Kconfig b/crypto/Kconfig index 1cd497d..e573077 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -101,13 +101,13 @@ config CRYPTO_MANAGER2 select CRYPTO_BLKCIPHER2 select CRYPTO_PCOMP2 -config CRYPTO_MANAGER_TESTS - bool "Run algolithms' self-tests" +config CRYPTO_MANAGER_DISABLE_TESTS + bool "Disable run-time self tests" default y depends on CRYPTO_MANAGER2 help - Run cryptomanager's tests for the new crypto algorithms being - registered. + Disable run-time self tests that normally take place at + algorithm registration. config CRYPTO_GF128MUL tristate "GF(2^128) multiplication functions (EXPERIMENTAL)" diff --git a/crypto/ahash.c b/crypto/ahash.c index b8c59b8..f669822 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -47,8 +47,11 @@ static int hash_walk_next(struct crypto_hash_walk *walk) walk->data = crypto_kmap(walk->pg, 0); walk->data += offset; - if (offset & alignmask) - nbytes = alignmask + 1 - (offset & alignmask); + if (offset & alignmask) { + unsigned int unaligned = alignmask + 1 - (offset & alignmask); + if (nbytes > unaligned) + nbytes = unaligned; + } walk->entrylen -= nbytes; return nbytes; diff --git a/crypto/algboss.c b/crypto/algboss.c index 40bd391..791d194 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -206,13 +206,16 @@ err: return NOTIFY_OK; } -#ifdef CONFIG_CRYPTO_MANAGER_TESTS static int cryptomgr_test(void *data) { struct crypto_test_param *param = data; u32 type = param->type; int err = 0; +#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS + goto skiptest; +#endif + if (type & CRYPTO_ALG_TESTED) goto skiptest; @@ -267,7 +270,6 @@ err_put_module: err: return NOTIFY_OK; } -#endif /* CONFIG_CRYPTO_MANAGER_TESTS */ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg, void *data) @@ -275,10 +277,8 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg, switch (msg) { case CRYPTO_MSG_ALG_REQUEST: return cryptomgr_schedule_probe(data); -#ifdef CONFIG_CRYPTO_MANAGER_TESTS case CRYPTO_MSG_ALG_REGISTER: return cryptomgr_schedule_test(data); -#endif } return NOTIFY_DONE; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index abd980c..fa8c8f7 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -23,7 +23,7 @@ #include "internal.h" -#ifndef CONFIG_CRYPTO_MANAGER_TESTS +#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS /* a perfect nop */ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) @@ -2542,6 +2542,6 @@ non_fips_alg: return -EINVAL; } -#endif /* CONFIG_CRYPTO_MANAGER_TESTS */ +#endif /* CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */ EXPORT_SYMBOL_GPL(alg_test); diff --git a/drivers/Makefile b/drivers/Makefile index ae47344..a2aea53 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -50,7 +50,7 @@ obj-$(CONFIG_SPI) += spi/ obj-y += net/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_FUSION) += message/ -obj-$(CONFIG_FIREWIRE) += firewire/ +obj-y += firewire/ obj-y += ieee1394/ obj-$(CONFIG_UIO) += uio/ obj-y += cdrom/ diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 1f67057..3ba8d1f 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -226,22 +225,31 @@ static acpi_status acpi_pci_run_osc(acpi_handle handle, return status; } -static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 flags) +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, + u32 support, + u32 *control) { acpi_status status; - u32 support_set, result, capbuf[3]; + u32 result, capbuf[3]; + + support &= OSC_PCI_SUPPORT_MASKS; + support |= root->osc_support_set; - /* do _OSC query for all possible controls */ - support_set = root->osc_support_set | (flags & OSC_PCI_SUPPORT_MASKS); capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - capbuf[OSC_SUPPORT_TYPE] = support_set; - capbuf[OSC_CONTROL_TYPE] = OSC_PCI_CONTROL_MASKS; + capbuf[OSC_SUPPORT_TYPE] = support; + if (control) { + *control &= OSC_PCI_CONTROL_MASKS; + capbuf[OSC_CONTROL_TYPE] = *control | root->osc_control_set; + } else { + /* Run _OSC query for all possible controls. */ + capbuf[OSC_CONTROL_TYPE] = OSC_PCI_CONTROL_MASKS; + } status = acpi_pci_run_osc(root->device->handle, capbuf, &result); if (ACPI_SUCCESS(status)) { - root->osc_support_set = support_set; - root->osc_control_qry = result; - root->osc_queried = 1; + root->osc_support_set = support; + if (control) + *control = result; } return status; } @@ -255,7 +263,7 @@ static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags) if (ACPI_FAILURE(status)) return status; mutex_lock(&osc_lock); - status = acpi_pci_query_osc(root, flags); + status = acpi_pci_query_osc(root, flags, NULL); mutex_unlock(&osc_lock); return status; } @@ -365,55 +373,70 @@ out: EXPORT_SYMBOL_GPL(acpi_get_pci_dev); /** - * acpi_pci_osc_control_set - commit requested control to Firmware - * @handle: acpi_handle for the target ACPI object - * @flags: driver's requested control bits + * acpi_pci_osc_control_set - Request control of PCI root _OSC features. + * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). + * @mask: Mask of _OSC bits to request control of, place to store control mask. + * @req: Mask of _OSC bits the control of is essential to the caller. + * + * Run _OSC query for @mask and if that is successful, compare the returned + * mask of control bits with @req. If all of the @req bits are set in the + * returned mask, run _OSC request for it. * - * Attempt to take control from Firmware on requested control bits. + * The variable at the @mask address may be modified regardless of whether or + * not the function returns success. On success it will contain the mask of + * _OSC bits the BIOS has granted control of, but its contents are meaningless + * on failure. **/ -acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags) +acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req) { + struct acpi_pci_root *root; acpi_status status; - u32 control_req, result, capbuf[3]; + u32 ctrl, capbuf[3]; acpi_handle tmp; - struct acpi_pci_root *root; - status = acpi_get_handle(handle, "_OSC", &tmp); - if (ACPI_FAILURE(status)) - return status; + if (!mask) + return AE_BAD_PARAMETER; - control_req = (flags & OSC_PCI_CONTROL_MASKS); - if (!control_req) + ctrl = *mask & OSC_PCI_CONTROL_MASKS; + if ((ctrl & req) != req) return AE_TYPE; root = acpi_pci_find_root(handle); if (!root) return AE_NOT_EXIST; + status = acpi_get_handle(handle, "_OSC", &tmp); + if (ACPI_FAILURE(status)) + return status; + mutex_lock(&osc_lock); + + *mask = ctrl | root->osc_control_set; /* No need to evaluate _OSC if the control was already granted. */ - if ((root->osc_control_set & control_req) == control_req) + if ((root->osc_control_set & ctrl) == ctrl) goto out; - /* Need to query controls first before requesting them */ - if (!root->osc_queried) { - status = acpi_pci_query_osc(root, root->osc_support_set); + /* Need to check the available controls bits before requesting them. */ + while (*mask) { + status = acpi_pci_query_osc(root, root->osc_support_set, mask); if (ACPI_FAILURE(status)) goto out; + if (ctrl == *mask) + break; + ctrl = *mask; } - if ((root->osc_control_qry & control_req) != control_req) { - printk(KERN_DEBUG - "Firmware did not grant requested _OSC control\n"); + + if ((ctrl & req) != req) { status = AE_SUPPORT; goto out; } capbuf[OSC_QUERY_TYPE] = 0; capbuf[OSC_SUPPORT_TYPE] = root->osc_support_set; - capbuf[OSC_CONTROL_TYPE] = root->osc_control_set | control_req; - status = acpi_pci_run_osc(handle, capbuf, &result); + capbuf[OSC_CONTROL_TYPE] = ctrl; + status = acpi_pci_run_osc(handle, capbuf, mask); if (ACPI_SUCCESS(status)) - root->osc_control_set = result; + root->osc_control_set = *mask; out: mutex_unlock(&osc_lock); return status; @@ -544,14 +567,6 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (flags != base_flags) acpi_pci_osc_support(root, flags); - status = acpi_pci_osc_control_set(root->device->handle, - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); - - if (ACPI_FAILURE(status)) { - printk(KERN_INFO "Unable to assume PCIe control: Disabling ASPM\n"); - pcie_no_aspm(); - } - pci_acpi_add_bus_pm_notifier(device, root->bus); if (device->wakeup.flags.run_wake) device_set_run_wake(root->bus->bridge, true); diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index d5df04a..ccd461b 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -99,7 +99,7 @@ obj-$(CONFIG_ATA_GENERIC) += ata_generic.o # Should be last libata driver obj-$(CONFIG_PATA_LEGACY) += pata_legacy.o -libata-objs := libata-core.o libata-scsi.o libata-eh.o +libata-objs := libata-core.o libata-scsi.o libata-eh.o libata-transport.o libata-$(CONFIG_ATA_SFF) += libata-sff.o libata-$(CONFIG_SATA_PMP) += libata-pmp.o libata-$(CONFIG_ATA_ACPI) += libata-acpi.o diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 013727b..7bc0412 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -253,6 +253,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x1c05), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1c06), board_ahci }, /* CPT RAID */ { PCI_VDEVICE(INTEL, 0x1c07), board_ahci }, /* CPT RAID */ + { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ + { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */ + { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, @@ -1201,9 +1204,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ata_port_pbar_desc(ap, AHCI_PCI_BAR, 0x100 + ap->port_no * 0x80, "port"); - /* set initial link pm policy */ - ap->pm_policy = NOT_AVAILABLE; - /* set enclosure management message type */ if (ap->flags & ATA_FLAG_EM) ap->em_message_type = hpriv->em_msg_type; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 474427b..e1dfc3c 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -201,7 +201,6 @@ enum { AHCI_HFLAG_MV_PATA = (1 << 4), /* PATA port */ AHCI_HFLAG_NO_MSI = (1 << 5), /* no PCI MSI */ AHCI_HFLAG_NO_PMP = (1 << 6), /* no PMP */ - AHCI_HFLAG_NO_HOTPLUG = (1 << 7), /* ignore PxSERR.DIAG.N */ AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ @@ -216,7 +215,7 @@ enum { AHCI_FLAG_COMMON = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | ATA_FLAG_MMIO | ATA_FLAG_PIO_DMA | ATA_FLAG_ACPI_SATA | ATA_FLAG_AN | - ATA_FLAG_IPM, + ATA_FLAG_LPM, ICH_MAP = 0x90, /* ICH MAP register */ diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 4e97f33..d732b8f 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -125,9 +125,6 @@ static int __init ahci_probe(struct platform_device *pdev) ata_port_desc(ap, "mmio %pR", mem); ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80); - /* set initial link pm policy */ - ap->pm_policy = NOT_AVAILABLE; - /* set enclosure management message type */ if (ap->flags & ATA_FLAG_EM) ap->em_message_type = hpriv->em_msg_type; diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 3971bc0..2cfc1ae 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -158,7 +158,6 @@ struct piix_map_db { struct piix_host_priv { const int *map; u32 saved_iocfg; - spinlock_t sidpr_lock; /* FIXME: remove once locking in EH is fixed */ void __iomem *sidpr; }; @@ -175,6 +174,8 @@ static int piix_sidpr_scr_read(struct ata_link *link, unsigned int reg, u32 *val); static int piix_sidpr_scr_write(struct ata_link *link, unsigned int reg, u32 val); +static int piix_sidpr_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); static bool piix_irq_check(struct ata_port *ap); #ifdef CONFIG_PM static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); @@ -302,6 +303,10 @@ static const struct pci_device_id piix_pci_tbl[] = { { 0x8086, 0x1c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (CPT) */ { 0x8086, 0x1c09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (PBG) */ + { 0x8086, 0x1d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata }, + /* SATA Controller IDE (PBG) */ + { 0x8086, 0x1d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, { } /* terminate list */ }; @@ -344,11 +349,22 @@ static struct ata_port_operations ich_pata_ops = { .set_dmamode = ich_set_dmamode, }; +static struct device_attribute *piix_sidpr_shost_attrs[] = { + &dev_attr_link_power_management_policy, + NULL +}; + +static struct scsi_host_template piix_sidpr_sht = { + ATA_BMDMA_SHT(DRV_NAME), + .shost_attrs = piix_sidpr_shost_attrs, +}; + static struct ata_port_operations piix_sidpr_sata_ops = { .inherits = &piix_sata_ops, .hardreset = sata_std_hardreset, .scr_read = piix_sidpr_scr_read, .scr_write = piix_sidpr_scr_write, + .set_lpm = piix_sidpr_set_lpm, }; static const struct piix_map_db ich5_map_db = { @@ -952,15 +968,12 @@ static int piix_sidpr_scr_read(struct ata_link *link, unsigned int reg, u32 *val) { struct piix_host_priv *hpriv = link->ap->host->private_data; - unsigned long flags; if (reg >= ARRAY_SIZE(piix_sidx_map)) return -EINVAL; - spin_lock_irqsave(&hpriv->sidpr_lock, flags); piix_sidpr_sel(link, reg); *val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA); - spin_unlock_irqrestore(&hpriv->sidpr_lock, flags); return 0; } @@ -968,18 +981,21 @@ static int piix_sidpr_scr_write(struct ata_link *link, unsigned int reg, u32 val) { struct piix_host_priv *hpriv = link->ap->host->private_data; - unsigned long flags; if (reg >= ARRAY_SIZE(piix_sidx_map)) return -EINVAL; - spin_lock_irqsave(&hpriv->sidpr_lock, flags); piix_sidpr_sel(link, reg); iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA); - spin_unlock_irqrestore(&hpriv->sidpr_lock, flags); return 0; } +static int piix_sidpr_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints) +{ + return sata_link_scr_lpm(link, policy, false); +} + static bool piix_irq_check(struct ata_port *ap) { if (unlikely(!ap->ioaddr.bmdma_addr)) @@ -1539,6 +1555,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev, struct device *dev = &pdev->dev; struct ata_port_info port_info[2]; const struct ata_port_info *ppi[] = { &port_info[0], &port_info[1] }; + struct scsi_host_template *sht = &piix_sht; unsigned long port_flags; struct ata_host *host; struct piix_host_priv *hpriv; @@ -1573,7 +1590,6 @@ static int __devinit piix_init_one(struct pci_dev *pdev, hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL); if (!hpriv) return -ENOMEM; - spin_lock_init(&hpriv->sidpr_lock); /* Save IOCFG, this will be used for cable detection, quirk * detection and restoration on detach. This is necessary @@ -1608,6 +1624,8 @@ static int __devinit piix_init_one(struct pci_dev *pdev, rc = piix_init_sidpr(host); if (rc) return rc; + if (host->ports[0]->ops == &piix_sidpr_sata_ops) + sht = &piix_sidpr_sht; } /* apply IOCFG bit18 quirk */ @@ -1634,7 +1652,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev, host->flags |= ATA_HOST_PARALLEL_SCAN; pci_set_master(pdev); - return ata_pci_sff_activate_host(host, ata_bmdma_interrupt, &piix_sht); + return ata_pci_sff_activate_host(host, ata_bmdma_interrupt, sht); } static void piix_remove_one(struct pci_dev *pdev) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 666850d..e1307f4 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -56,9 +56,8 @@ MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip) module_param_named(ignore_sss, ahci_ignore_sss, int, 0444); MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)"); -static int ahci_enable_alpm(struct ata_port *ap, - enum link_pm policy); -static void ahci_disable_alpm(struct ata_port *ap); +static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); static ssize_t ahci_led_show(struct ata_port *ap, char *buf); static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, size_t size); @@ -172,8 +171,7 @@ struct ata_port_operations ahci_ops = { .pmp_attach = ahci_pmp_attach, .pmp_detach = ahci_pmp_detach, - .enable_pm = ahci_enable_alpm, - .disable_pm = ahci_disable_alpm, + .set_lpm = ahci_set_lpm, .em_show = ahci_led_show, .em_store = ahci_led_store, .sw_activity_show = ahci_activity_show, @@ -577,7 +575,7 @@ int ahci_stop_engine(struct ata_port *ap) writel(tmp, port_mmio + PORT_CMD); /* wait for engine to stop. This could be as long as 500 msec */ - tmp = ata_wait_register(port_mmio + PORT_CMD, + tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); if (tmp & PORT_CMD_LIST_ON) return -EIO; @@ -624,7 +622,7 @@ static int ahci_stop_fis_rx(struct ata_port *ap) writel(tmp, port_mmio + PORT_CMD); /* wait for completion, spec says 500ms, give it 1000 */ - tmp = ata_wait_register(port_mmio + PORT_CMD, PORT_CMD_FIS_ON, + tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_FIS_ON, PORT_CMD_FIS_ON, 10, 1000); if (tmp & PORT_CMD_FIS_ON) return -EBUSY; @@ -650,127 +648,56 @@ static void ahci_power_up(struct ata_port *ap) writel(cmd | PORT_CMD_ICC_ACTIVE, port_mmio + PORT_CMD); } -static void ahci_disable_alpm(struct ata_port *ap) +static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned int hints) { + struct ata_port *ap = link->ap; struct ahci_host_priv *hpriv = ap->host->private_data; - void __iomem *port_mmio = ahci_port_base(ap); - u32 cmd; struct ahci_port_priv *pp = ap->private_data; - - /* IPM bits should be disabled by libata-core */ - /* get the existing command bits */ - cmd = readl(port_mmio + PORT_CMD); - - /* disable ALPM and ASP */ - cmd &= ~PORT_CMD_ASP; - cmd &= ~PORT_CMD_ALPE; - - /* force the interface back to active */ - cmd |= PORT_CMD_ICC_ACTIVE; - - /* write out new cmd value */ - writel(cmd, port_mmio + PORT_CMD); - cmd = readl(port_mmio + PORT_CMD); - - /* wait 10ms to be sure we've come out of any low power state */ - msleep(10); - - /* clear out any PhyRdy stuff from interrupt status */ - writel(PORT_IRQ_PHYRDY, port_mmio + PORT_IRQ_STAT); - - /* go ahead and clean out PhyRdy Change from Serror too */ - ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18))); - - /* - * Clear flag to indicate that we should ignore all PhyRdy - * state changes - */ - hpriv->flags &= ~AHCI_HFLAG_NO_HOTPLUG; - - /* - * Enable interrupts on Phy Ready. - */ - pp->intr_mask |= PORT_IRQ_PHYRDY; - writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); - - /* - * don't change the link pm policy - we can be called - * just to turn of link pm temporarily - */ -} - -static int ahci_enable_alpm(struct ata_port *ap, - enum link_pm policy) -{ - struct ahci_host_priv *hpriv = ap->host->private_data; void __iomem *port_mmio = ahci_port_base(ap); - u32 cmd; - struct ahci_port_priv *pp = ap->private_data; - u32 asp; - - /* Make sure the host is capable of link power management */ - if (!(hpriv->cap & HOST_CAP_ALPM)) - return -EINVAL; - switch (policy) { - case MAX_PERFORMANCE: - case NOT_AVAILABLE: + if (policy != ATA_LPM_MAX_POWER) { /* - * if we came here with NOT_AVAILABLE, - * it just means this is the first time we - * have tried to enable - default to max performance, - * and let the user go to lower power modes on request. + * Disable interrupts on Phy Ready. This keeps us from + * getting woken up due to spurious phy ready + * interrupts. */ - ahci_disable_alpm(ap); - return 0; - case MIN_POWER: - /* configure HBA to enter SLUMBER */ - asp = PORT_CMD_ASP; - break; - case MEDIUM_POWER: - /* configure HBA to enter PARTIAL */ - asp = 0; - break; - default: - return -EINVAL; + pp->intr_mask &= ~PORT_IRQ_PHYRDY; + writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); + + sata_link_scr_lpm(link, policy, false); } - /* - * Disable interrupts on Phy Ready. This keeps us from - * getting woken up due to spurious phy ready interrupts - * TBD - Hot plug should be done via polling now, is - * that even supported? - */ - pp->intr_mask &= ~PORT_IRQ_PHYRDY; - writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); + if (hpriv->cap & HOST_CAP_ALPM) { + u32 cmd = readl(port_mmio + PORT_CMD); - /* - * Set a flag to indicate that we should ignore all PhyRdy - * state changes since these can happen now whenever we - * change link state - */ - hpriv->flags |= AHCI_HFLAG_NO_HOTPLUG; + if (policy == ATA_LPM_MAX_POWER || !(hints & ATA_LPM_HIPM)) { + cmd &= ~(PORT_CMD_ASP | PORT_CMD_ALPE); + cmd |= PORT_CMD_ICC_ACTIVE; - /* get the existing command bits */ - cmd = readl(port_mmio + PORT_CMD); + writel(cmd, port_mmio + PORT_CMD); + readl(port_mmio + PORT_CMD); - /* - * Set ASP based on Policy - */ - cmd |= asp; + /* wait 10ms to be sure we've come out of LPM state */ + ata_msleep(ap, 10); + } else { + cmd |= PORT_CMD_ALPE; + if (policy == ATA_LPM_MIN_POWER) + cmd |= PORT_CMD_ASP; - /* - * Setting this bit will instruct the HBA to aggressively - * enter a lower power link state when it's appropriate and - * based on the value set above for ASP - */ - cmd |= PORT_CMD_ALPE; + /* write out new cmd value */ + writel(cmd, port_mmio + PORT_CMD); + } + } - /* write out new cmd value */ - writel(cmd, port_mmio + PORT_CMD); - cmd = readl(port_mmio + PORT_CMD); + if (policy == ATA_LPM_MAX_POWER) { + sata_link_scr_lpm(link, policy, false); + + /* turn PHYRDY IRQ back on */ + pp->intr_mask |= PORT_IRQ_PHYRDY; + writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); + } - /* IPM bits should be set by libata-core */ return 0; } @@ -821,7 +748,7 @@ static void ahci_start_port(struct ata_port *ap) emp->led_state, 4); if (rc == -EBUSY) - msleep(1); + ata_msleep(ap, 1); else break; } @@ -880,7 +807,7 @@ int ahci_reset_controller(struct ata_host *host) * reset must complete within 1 second, or * the hardware should be considered fried. */ - tmp = ata_wait_register(mmio + HOST_CTL, HOST_RESET, + tmp = ata_wait_register(NULL, mmio + HOST_CTL, HOST_RESET, HOST_RESET, 10, 1000); if (tmp & HOST_RESET) { @@ -1260,7 +1187,7 @@ int ahci_kick_engine(struct ata_port *ap) writel(tmp, port_mmio + PORT_CMD); rc = 0; - tmp = ata_wait_register(port_mmio + PORT_CMD, + tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_CLO, PORT_CMD_CLO, 1, 500); if (tmp & PORT_CMD_CLO) rc = -EIO; @@ -1290,8 +1217,8 @@ static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp, writel(1, port_mmio + PORT_CMD_ISSUE); if (timeout_msec) { - tmp = ata_wait_register(port_mmio + PORT_CMD_ISSUE, 0x1, 0x1, - 1, timeout_msec); + tmp = ata_wait_register(ap, port_mmio + PORT_CMD_ISSUE, + 0x1, 0x1, 1, timeout_msec); if (tmp & 0x1) { ahci_kick_engine(ap); return -EBUSY; @@ -1326,7 +1253,7 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class, /* issue the first D2H Register FIS */ msecs = 0; now = jiffies; - if (time_after(now, deadline)) + if (time_after(deadline, now)) msecs = jiffies_to_msecs(deadline - now); tf.ctl |= ATA_SRST; @@ -1338,7 +1265,7 @@ int ahci_do_softreset(struct ata_link *link, unsigned int *class, } /* spec says at least 5us, but be generous and sleep for 1ms */ - msleep(1); + ata_msleep(ap, 1); /* issue the second D2H Register FIS */ tf.ctl &= ~ATA_SRST; @@ -1668,15 +1595,10 @@ static void ahci_port_intr(struct ata_port *ap) if (unlikely(resetting)) status &= ~PORT_IRQ_BAD_PMP; - /* If we are getting PhyRdy, this is - * just a power state change, we should - * clear out this, plus the PhyRdy/Comm - * Wake bits from Serror - */ - if ((hpriv->flags & AHCI_HFLAG_NO_HOTPLUG) && - (status & PORT_IRQ_PHYRDY)) { + /* if LPM is enabled, PHYRDY doesn't mean anything */ + if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) { status &= ~PORT_IRQ_PHYRDY; - ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18))); + ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG); } if (unlikely(status & PORT_IRQ_ERROR)) { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c035b3d..7f77c67 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -68,7 +68,7 @@ #include #include "libata.h" - +#include "libata-transport.h" /* debounce timing parameters in msecs { interval, duration, timeout } */ const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 }; @@ -91,8 +91,6 @@ const struct ata_port_operations sata_port_ops = { static unsigned int ata_dev_init_params(struct ata_device *dev, u16 heads, u16 sectors); static unsigned int ata_dev_set_xfermode(struct ata_device *dev); -static unsigned int ata_dev_set_feature(struct ata_device *dev, - u8 enable, u8 feature); static void ata_dev_xfermask(struct ata_device *dev); static unsigned long ata_dev_blacklisted(const struct ata_device *dev); @@ -1017,7 +1015,7 @@ const char *ata_mode_string(unsigned long xfer_mask) return ""; } -static const char *sata_spd_string(unsigned int spd) +const char *sata_spd_string(unsigned int spd) { static const char * const spd_str[] = { "1.5 Gbps", @@ -1030,182 +1028,6 @@ static const char *sata_spd_string(unsigned int spd) return spd_str[spd - 1]; } -static int ata_dev_set_dipm(struct ata_device *dev, enum link_pm policy) -{ - struct ata_link *link = dev->link; - struct ata_port *ap = link->ap; - u32 scontrol; - unsigned int err_mask; - int rc; - - /* - * disallow DIPM for drivers which haven't set - * ATA_FLAG_IPM. This is because when DIPM is enabled, - * phy ready will be set in the interrupt status on - * state changes, which will cause some drivers to - * think there are errors - additionally drivers will - * need to disable hot plug. - */ - if (!(ap->flags & ATA_FLAG_IPM) || !ata_dev_enabled(dev)) { - ap->pm_policy = NOT_AVAILABLE; - return -EINVAL; - } - - /* - * For DIPM, we will only enable it for the - * min_power setting. - * - * Why? Because Disks are too stupid to know that - * If the host rejects a request to go to SLUMBER - * they should retry at PARTIAL, and instead it - * just would give up. So, for medium_power to - * work at all, we need to only allow HIPM. - */ - rc = sata_scr_read(link, SCR_CONTROL, &scontrol); - if (rc) - return rc; - - switch (policy) { - case MIN_POWER: - /* no restrictions on IPM transitions */ - scontrol &= ~(0x3 << 8); - rc = sata_scr_write(link, SCR_CONTROL, scontrol); - if (rc) - return rc; - - /* enable DIPM */ - if (dev->flags & ATA_DFLAG_DIPM) - err_mask = ata_dev_set_feature(dev, - SETFEATURES_SATA_ENABLE, SATA_DIPM); - break; - case MEDIUM_POWER: - /* allow IPM to PARTIAL */ - scontrol &= ~(0x1 << 8); - scontrol |= (0x2 << 8); - rc = sata_scr_write(link, SCR_CONTROL, scontrol); - if (rc) - return rc; - - /* - * we don't have to disable DIPM since IPM flags - * disallow transitions to SLUMBER, which effectively - * disable DIPM if it does not support PARTIAL - */ - break; - case NOT_AVAILABLE: - case MAX_PERFORMANCE: - /* disable all IPM transitions */ - scontrol |= (0x3 << 8); - rc = sata_scr_write(link, SCR_CONTROL, scontrol); - if (rc) - return rc; - - /* - * we don't have to disable DIPM since IPM flags - * disallow all transitions which effectively - * disable DIPM anyway. - */ - break; - } - - /* FIXME: handle SET FEATURES failure */ - (void) err_mask; - - return 0; -} - -/** - * ata_dev_enable_pm - enable SATA interface power management - * @dev: device to enable power management - * @policy: the link power management policy - * - * Enable SATA Interface power management. This will enable - * Device Interface Power Management (DIPM) for min_power - * policy, and then call driver specific callbacks for - * enabling Host Initiated Power management. - * - * Locking: Caller. - * Returns: -EINVAL if IPM is not supported, 0 otherwise. - */ -void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy) -{ - int rc = 0; - struct ata_port *ap = dev->link->ap; - - /* set HIPM first, then DIPM */ - if (ap->ops->enable_pm) - rc = ap->ops->enable_pm(ap, policy); - if (rc) - goto enable_pm_out; - rc = ata_dev_set_dipm(dev, policy); - -enable_pm_out: - if (rc) - ap->pm_policy = MAX_PERFORMANCE; - else - ap->pm_policy = policy; - return /* rc */; /* hopefully we can use 'rc' eventually */ -} - -#ifdef CONFIG_PM -/** - * ata_dev_disable_pm - disable SATA interface power management - * @dev: device to disable power management - * - * Disable SATA Interface power management. This will disable - * Device Interface Power Management (DIPM) without changing - * policy, call driver specific callbacks for disabling Host - * Initiated Power management. - * - * Locking: Caller. - * Returns: void - */ -static void ata_dev_disable_pm(struct ata_device *dev) -{ - struct ata_port *ap = dev->link->ap; - - ata_dev_set_dipm(dev, MAX_PERFORMANCE); - if (ap->ops->disable_pm) - ap->ops->disable_pm(ap); -} -#endif /* CONFIG_PM */ - -void ata_lpm_schedule(struct ata_port *ap, enum link_pm policy) -{ - ap->pm_policy = policy; - ap->link.eh_info.action |= ATA_EH_LPM; - ap->link.eh_info.flags |= ATA_EHI_NO_AUTOPSY; - ata_port_schedule_eh(ap); -} - -#ifdef CONFIG_PM -static void ata_lpm_enable(struct ata_host *host) -{ - struct ata_link *link; - struct ata_port *ap; - struct ata_device *dev; - int i; - - for (i = 0; i < host->n_ports; i++) { - ap = host->ports[i]; - ata_for_each_link(link, ap, EDGE) { - ata_for_each_dev(dev, link, ALL) - ata_dev_disable_pm(dev); - } - } -} - -static void ata_lpm_disable(struct ata_host *host) -{ - int i; - - for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap = host->ports[i]; - ata_lpm_schedule(ap, ap->pm_policy); - } -} -#endif /* CONFIG_PM */ - /** * ata_dev_classify - determine device type based on ATA-spec signature * @tf: ATA taskfile register set for device to be identified @@ -1806,8 +1628,14 @@ unsigned ata_exec_internal_sg(struct ata_device *dev, } } + if (ap->ops->error_handler) + ata_eh_release(ap); + rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout)); + if (ap->ops->error_handler) + ata_eh_acquire(ap); + ata_sff_flush_pio_task(ap); if (!rc) { @@ -2564,13 +2392,6 @@ int ata_dev_configure(struct ata_device *dev) if (dev->flags & ATA_DFLAG_LBA48) dev->max_sectors = ATA_MAX_SECTORS_LBA48; - if (!(dev->horkage & ATA_HORKAGE_IPM)) { - if (ata_id_has_hipm(dev->id)) - dev->flags |= ATA_DFLAG_HIPM; - if (ata_id_has_dipm(dev->id)) - dev->flags |= ATA_DFLAG_DIPM; - } - /* Limit PATA drive on SATA cable bridge transfers to udma5, 200 sectors */ if (ata_dev_knobble(dev)) { @@ -2591,13 +2412,6 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); - if (ata_dev_blacklisted(dev) & ATA_HORKAGE_IPM) { - dev->horkage |= ATA_HORKAGE_IPM; - - /* reset link pm_policy for this port to no pm */ - ap->pm_policy = MAX_PERFORMANCE; - } - if (ap->ops->dev_config) ap->ops->dev_config(dev); @@ -3596,7 +3410,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline, warned = 1; } - msleep(50); + ata_msleep(link->ap, 50); } } @@ -3617,7 +3431,7 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline, int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, int (*check_ready)(struct ata_link *link)) { - msleep(ATA_WAIT_AFTER_RESET); + ata_msleep(link->ap, ATA_WAIT_AFTER_RESET); return ata_wait_ready(link, deadline, check_ready); } @@ -3628,7 +3442,7 @@ int ata_wait_after_reset(struct ata_link *link, unsigned long deadline, * @params: timing parameters { interval, duratinon, timeout } in msec * @deadline: deadline jiffies for the operation * -* Make sure SStatus of @link reaches stable state, determined by + * Make sure SStatus of @link reaches stable state, determined by * holding the same value where DET is not 1 for @duration polled * every @interval, before @timeout. Timeout constraints the * beginning of the stable state. Because DET gets stuck at 1 on @@ -3665,7 +3479,7 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params, last_jiffies = jiffies; while (1) { - msleep(interval); + ata_msleep(link->ap, interval); if ((rc = sata_scr_read(link, SCR_STATUS, &cur))) return rc; cur &= 0xf; @@ -3730,7 +3544,7 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params, * immediately after resuming. Delay 200ms before * debouncing. */ - msleep(200); + ata_msleep(link->ap, 200); /* is SControl restored correctly? */ if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol))) @@ -3760,6 +3574,72 @@ int sata_link_resume(struct ata_link *link, const unsigned long *params, } /** + * sata_link_scr_lpm - manipulate SControl IPM and SPM fields + * @link: ATA link to manipulate SControl for + * @policy: LPM policy to configure + * @spm_wakeup: initiate LPM transition to active state + * + * Manipulate the IPM field of the SControl register of @link + * according to @policy. If @policy is ATA_LPM_MAX_POWER and + * @spm_wakeup is %true, the SPM field is manipulated to wake up + * the link. This function also clears PHYRDY_CHG before + * returning. + * + * LOCKING: + * EH context. + * + * RETURNS: + * 0 on succes, -errno otherwise. + */ +int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, + bool spm_wakeup) +{ + struct ata_eh_context *ehc = &link->eh_context; + bool woken_up = false; + u32 scontrol; + int rc; + + rc = sata_scr_read(link, SCR_CONTROL, &scontrol); + if (rc) + return rc; + + switch (policy) { + case ATA_LPM_MAX_POWER: + /* disable all LPM transitions */ + scontrol |= (0x3 << 8); + /* initiate transition to active state */ + if (spm_wakeup) { + scontrol |= (0x4 << 12); + woken_up = true; + } + break; + case ATA_LPM_MED_POWER: + /* allow LPM to PARTIAL */ + scontrol &= ~(0x1 << 8); + scontrol |= (0x2 << 8); + break; + case ATA_LPM_MIN_POWER: + /* no restrictions on LPM transitions */ + scontrol &= ~(0x3 << 8); + break; + default: + WARN_ON(1); + } + + rc = sata_scr_write(link, SCR_CONTROL, scontrol); + if (rc) + return rc; + + /* give the link time to transit out of LPM state */ + if (woken_up) + msleep(10); + + /* clear PHYRDY_CHG from SError */ + ehc->i.serror &= ~SERR_PHYRDY_CHG; + return sata_scr_write(link, SCR_ERROR, SERR_PHYRDY_CHG); +} + +/** * ata_std_prereset - prepare for reset * @link: ATA link to be reset * @deadline: deadline jiffies for the operation @@ -3868,7 +3748,7 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, /* Couldn't find anything in SATA I/II specs, but AHCI-1.1 * 10.4.2 says at least 1 ms. */ - msleep(1); + ata_msleep(link->ap, 1); /* bring link back */ rc = sata_link_resume(link, timing, deadline); @@ -4551,6 +4431,7 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev) DPRINTK("EXIT, err_mask=%x\n", err_mask); return err_mask; } + /** * ata_dev_set_feature - Issue SET FEATURES - SATA FEATURES * @dev: Device to which command will be sent @@ -4566,8 +4447,7 @@ static unsigned int ata_dev_set_xfermode(struct ata_device *dev) * RETURNS: * 0 on success, AC_ERR_* mask otherwise. */ -static unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, - u8 feature) +unsigned int ata_dev_set_feature(struct ata_device *dev, u8 enable, u8 feature) { struct ata_taskfile tf; unsigned int err_mask; @@ -4943,8 +4823,13 @@ static void ata_verify_xfer(struct ata_queued_cmd *qc) * ata_qc_complete - Complete an active ATA command * @qc: Command to complete * - * Indicate to the mid and upper layers that an ATA - * command has completed, with either an ok or not-ok status. + * Indicate to the mid and upper layers that an ATA command has + * completed, with either an ok or not-ok status. + * + * Refrain from calling this function multiple times when + * successfully completing multiple NCQ commands. + * ata_qc_complete_multiple() should be used instead, which will + * properly update IRQ expect state. * * LOCKING: * spin_lock_irqsave(host lock) @@ -5037,6 +4922,10 @@ void ata_qc_complete(struct ata_queued_cmd *qc) * requests normally. ap->qc_active and @qc_active is compared * and commands are completed accordingly. * + * Always use this function when completing multiple NCQ commands + * from IRQ handlers instead of calling ata_qc_complete() + * multiple times to keep IRQ expect status properly in sync. + * * LOCKING: * spin_lock_irqsave(host lock) * @@ -5418,15 +5307,21 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg, */ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) { + unsigned int ehi_flags = ATA_EHI_QUIET; int rc; /* - * disable link pm on all ports before requesting - * any pm activity + * On some hardware, device fails to respond after spun down + * for suspend. As the device won't be used before being + * resumed, we don't need to touch the device. Ask EH to skip + * the usual stuff and proceed directly to suspend. + * + * http://thread.gmane.org/gmane.linux.ide/46764 */ - ata_lpm_enable(host); + if (mesg.event == PM_EVENT_SUSPEND) + ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY; - rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1); + rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1); if (rc == 0) host->dev->power.power_state = mesg; return rc; @@ -5448,9 +5343,6 @@ void ata_host_resume(struct ata_host *host) ata_host_request_pm(host, PMSG_ON, ATA_EH_RESET, ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, 0); host->dev->power.power_state = PMSG_ON; - - /* reenable link pm */ - ata_lpm_disable(host); } #endif @@ -5505,7 +5397,8 @@ void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp) int i; /* clear everything except for devices */ - memset(link, 0, offsetof(struct ata_link, device[0])); + memset((void *)link + ATA_LINK_CLEAR_BEGIN, 0, + ATA_LINK_CLEAR_END - ATA_LINK_CLEAR_BEGIN); link->ap = ap; link->pmp = pmp; @@ -5579,7 +5472,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) ap = kzalloc(sizeof(*ap), GFP_KERNEL); if (!ap) return NULL; - + ap->pflags |= ATA_PFLAG_INITIALIZING; ap->lock = &host->lock; ap->print_id = -1; @@ -5683,6 +5576,7 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) dev_set_drvdata(dev, host); spin_lock_init(&host->lock); + mutex_init(&host->eh_mutex); host->dev = dev; host->n_ports = max_ports; @@ -5980,6 +5874,7 @@ void ata_host_init(struct ata_host *host, struct device *dev, unsigned long flags, struct ata_port_operations *ops) { spin_lock_init(&host->lock); + mutex_init(&host->eh_mutex); host->dev = dev; host->flags = flags; host->ops = ops; @@ -6010,7 +5905,7 @@ static void async_port_probe(void *data, async_cookie_t cookie) spin_lock_irqsave(ap->lock, flags); ehi->probe_mask |= ATA_ALL_DEVICES; - ehi->action |= ATA_EH_RESET | ATA_EH_LPM; + ehi->action |= ATA_EH_RESET; ehi->flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET; ap->pflags &= ~ATA_PFLAG_INITIALIZING; @@ -6081,9 +5976,18 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) for (i = 0; i < host->n_ports; i++) host->ports[i]->print_id = ata_print_id++; + + /* Create associated sysfs transport objects */ + for (i = 0; i < host->n_ports; i++) { + rc = ata_tport_add(host->dev,host->ports[i]); + if (rc) { + goto err_tadd; + } + } + rc = ata_scsi_add_hosts(host, sht); if (rc) - return rc; + goto err_tadd; /* associate with ACPI nodes */ ata_acpi_associate(host); @@ -6124,6 +6028,13 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) } return 0; + + err_tadd: + while (--i >= 0) { + ata_tport_delete(host->ports[i]); + } + return rc; + } /** @@ -6214,6 +6125,13 @@ static void ata_port_detach(struct ata_port *ap) cancel_rearming_delayed_work(&ap->hotplug_task); skip_eh: + if (ap->pmp_link) { + int i; + for (i = 0; i < SATA_PMP_MAX_PORTS; i++) + ata_tlink_delete(&ap->pmp_link[i]); + } + ata_tport_delete(ap); + /* remove the associated SCSI host */ scsi_remove_host(ap->scsi_host); } @@ -6530,7 +6448,7 @@ static void __init ata_parse_force_param(void) static int __init ata_init(void) { - int rc = -ENOMEM; + int rc; ata_parse_force_param(); @@ -6540,12 +6458,25 @@ static int __init ata_init(void) return rc; } + libata_transport_init(); + ata_scsi_transport_template = ata_attach_transport(); + if (!ata_scsi_transport_template) { + ata_sff_exit(); + rc = -ENOMEM; + goto err_out; + } + printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n"); return 0; + +err_out: + return rc; } static void __exit ata_exit(void) { + ata_release_transport(ata_scsi_transport_template); + libata_transport_exit(); ata_sff_exit(); kfree(ata_force_tbl); } @@ -6561,7 +6492,35 @@ int ata_ratelimit(void) } /** + * ata_msleep - ATA EH owner aware msleep + * @ap: ATA port to attribute the sleep to + * @msecs: duration to sleep in milliseconds + * + * Sleeps @msecs. If the current task is owner of @ap's EH, the + * ownership is released before going to sleep and reacquired + * after the sleep is complete. IOW, other ports sharing the + * @ap->host will be allowed to own the EH while this task is + * sleeping. + * + * LOCKING: + * Might sleep. + */ +void ata_msleep(struct ata_port *ap, unsigned int msecs) +{ + bool owns_eh = ap && ap->host->eh_owner == current; + + if (owns_eh) + ata_eh_release(ap); + + msleep(msecs); + + if (owns_eh) + ata_eh_acquire(ap); +} + +/** * ata_wait_register - wait until register value changes + * @ap: ATA port to wait register for, can be NULL * @reg: IO-mapped register * @mask: Mask to apply to read register value * @val: Wait condition @@ -6583,7 +6542,7 @@ int ata_ratelimit(void) * RETURNS: * The final register value. */ -u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, +u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val, unsigned long interval, unsigned long timeout) { unsigned long deadline; @@ -6598,7 +6557,7 @@ u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, deadline = ata_deadline(jiffies, timeout); while ((tmp & mask) == val && time_before(jiffies, deadline)) { - msleep(interval); + ata_msleep(ap, interval); tmp = ioread32(reg); } @@ -6674,6 +6633,7 @@ EXPORT_SYMBOL_GPL(sata_set_spd); EXPORT_SYMBOL_GPL(ata_wait_after_reset); EXPORT_SYMBOL_GPL(sata_link_debounce); EXPORT_SYMBOL_GPL(sata_link_resume); +EXPORT_SYMBOL_GPL(sata_link_scr_lpm); EXPORT_SYMBOL_GPL(ata_std_prereset); EXPORT_SYMBOL_GPL(sata_link_hardreset); EXPORT_SYMBOL_GPL(sata_std_hardreset); @@ -6681,6 +6641,7 @@ EXPORT_SYMBOL_GPL(ata_std_postreset); EXPORT_SYMBOL_GPL(ata_dev_classify); EXPORT_SYMBOL_GPL(ata_dev_pair); EXPORT_SYMBOL_GPL(ata_ratelimit); +EXPORT_SYMBOL_GPL(ata_msleep); EXPORT_SYMBOL_GPL(ata_wait_register); EXPORT_SYMBOL_GPL(ata_scsi_queuecmd); EXPORT_SYMBOL_GPL(ata_scsi_slave_config); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c9ae299..5e59050 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -57,6 +57,7 @@ enum { /* error flags */ ATA_EFLAG_IS_IO = (1 << 0), ATA_EFLAG_DUBIOUS_XFER = (1 << 1), + ATA_EFLAG_OLD_ER = (1 << 31), /* error categories */ ATA_ECAT_NONE = 0, @@ -396,14 +397,9 @@ static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) return NULL; } -static void ata_ering_clear(struct ata_ering *ering) -{ - memset(ering, 0, sizeof(*ering)); -} - -static int ata_ering_map(struct ata_ering *ering, - int (*map_fn)(struct ata_ering_entry *, void *), - void *arg) +int ata_ering_map(struct ata_ering *ering, + int (*map_fn)(struct ata_ering_entry *, void *), + void *arg) { int idx, rc = 0; struct ata_ering_entry *ent; @@ -422,6 +418,17 @@ static int ata_ering_map(struct ata_ering *ering, return rc; } +int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) +{ + ent->eflags |= ATA_EFLAG_OLD_ER; + return 0; +} + +static void ata_ering_clear(struct ata_ering *ering) +{ + ata_ering_map(ering, ata_ering_clear_cb, NULL); +} + static unsigned int ata_eh_dev_action(struct ata_device *dev) { struct ata_eh_context *ehc = &dev->link->eh_context; @@ -456,6 +463,41 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, } /** + * ata_eh_acquire - acquire EH ownership + * @ap: ATA port to acquire EH ownership for + * + * Acquire EH ownership for @ap. This is the basic exclusion + * mechanism for ports sharing a host. Only one port hanging off + * the same host can claim the ownership of EH. + * + * LOCKING: + * EH context. + */ +void ata_eh_acquire(struct ata_port *ap) +{ + mutex_lock(&ap->host->eh_mutex); + WARN_ON_ONCE(ap->host->eh_owner); + ap->host->eh_owner = current; +} + +/** + * ata_eh_release - release EH ownership + * @ap: ATA port to release EH ownership for + * + * Release EH ownership for @ap if the caller. The caller must + * have acquired EH ownership using ata_eh_acquire() previously. + * + * LOCKING: + * EH context. + */ +void ata_eh_release(struct ata_port *ap) +{ + WARN_ON_ONCE(ap->host->eh_owner != current); + ap->host->eh_owner = NULL; + mutex_unlock(&ap->host->eh_mutex); +} + +/** * ata_scsi_timed_out - SCSI layer time out callback * @cmd: timed out SCSI command * @@ -572,19 +614,19 @@ void ata_scsi_error(struct Scsi_Host *host) int nr_timedout = 0; spin_lock_irqsave(ap->lock, flags); - + /* This must occur under the ap->lock as we don't want a polled recovery to race the real interrupt handler - + The lost_interrupt handler checks for any completed but non-notified command and completes much like an IRQ handler. - + We then fall into the error recovery code which will treat this as if normal completion won the race */ if (ap->ops->lost_interrupt) ap->ops->lost_interrupt(ap); - + list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { struct ata_queued_cmd *qc; @@ -628,15 +670,17 @@ void ata_scsi_error(struct Scsi_Host *host) ap->eh_tries = ATA_EH_MAX_TRIES; } else spin_unlock_wait(ap->lock); - + /* If we timed raced normal completion and there is nothing to recover nr_timedout == 0 why exactly are we doing error recovery ? */ - repeat: /* invoke error handler */ if (ap->ops->error_handler) { struct ata_link *link; + /* acquire EH ownership */ + ata_eh_acquire(ap); + repeat: /* kill fast drain timer */ del_timer_sync(&ap->fastdrain_timer); @@ -711,6 +755,7 @@ void ata_scsi_error(struct Scsi_Host *host) host->host_eh_scheduled = 0; spin_unlock_irqrestore(ap->lock, flags); + ata_eh_release(ap); } else { WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); ap->ops->eng_timeout(ap); @@ -772,7 +817,7 @@ void ata_port_wait_eh(struct ata_port *ap) /* make sure SCSI EH is complete */ if (scsi_host_in_recovery(ap->scsi_host)) { - msleep(10); + ata_msleep(ap, 10); goto retry; } } @@ -1573,9 +1618,9 @@ static void ata_eh_analyze_serror(struct ata_link *link) * host links. For disabled PMP links, only N bit is * considered as X bit is left at 1 for link plugging. */ - hotplug_mask = 0; - - if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) + if (link->lpm_policy != ATA_LPM_MAX_POWER) + hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ + else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; else hotplug_mask = SERR_PHYRDY_CHG; @@ -1755,7 +1800,7 @@ static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) struct speed_down_verdict_arg *arg = void_arg; int cat; - if (ent->timestamp < arg->since) + if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) return -1; cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, @@ -2777,8 +2822,9 @@ int ata_eh_reset(struct ata_link *link, int classify, ata_eh_done(link, NULL, ATA_EH_RESET); if (slave) ata_eh_done(slave, NULL, ATA_EH_RESET); - ehc->last_reset = jiffies; /* update to completion time */ + ehc->last_reset = jiffies; /* update to completion time */ ehc->i.action |= ATA_EH_REVALIDATE; + link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ rc = 0; out: @@ -2810,8 +2856,10 @@ int ata_eh_reset(struct ata_link *link, int classify, "reset failed (errno=%d), retrying in %u secs\n", rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); + ata_eh_release(ap); while (delta) delta = schedule_timeout_uninterruptible(delta); + ata_eh_acquire(ap); } if (try == max_tries - 1) { @@ -3204,6 +3252,124 @@ static int ata_eh_maybe_retry_flush(struct ata_device *dev) return rc; } +/** + * ata_eh_set_lpm - configure SATA interface power management + * @link: link to configure power management + * @policy: the link power management policy + * @r_failed_dev: out parameter for failed device + * + * Enable SATA Interface power management. This will enable + * Device Interface Power Management (DIPM) for min_power + * policy, and then call driver specific callbacks for + * enabling Host Initiated Power management. + * + * LOCKING: + * EH context. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + struct ata_device **r_failed_dev) +{ + struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; + struct ata_eh_context *ehc = &link->eh_context; + struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; + unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; + unsigned int err_mask; + int rc; + + /* if the link or host doesn't do LPM, noop */ + if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) + return 0; + + /* + * DIPM is enabled only for MIN_POWER as some devices + * misbehave when the host NACKs transition to SLUMBER. Order + * device and link configurations such that the host always + * allows DIPM requests. + */ + ata_for_each_dev(dev, link, ENABLED) { + bool hipm = ata_id_has_hipm(dev->id); + bool dipm = ata_id_has_dipm(dev->id); + + /* find the first enabled and LPM enabled devices */ + if (!link_dev) + link_dev = dev; + + if (!lpm_dev && (hipm || dipm)) + lpm_dev = dev; + + hints &= ~ATA_LPM_EMPTY; + if (!hipm) + hints &= ~ATA_LPM_HIPM; + + /* disable DIPM before changing link config */ + if (policy != ATA_LPM_MIN_POWER && dipm) { + err_mask = ata_dev_set_feature(dev, + SETFEATURES_SATA_DISABLE, SATA_DIPM); + if (err_mask && err_mask != AC_ERR_DEV) { + ata_dev_printk(dev, KERN_WARNING, + "failed to disable DIPM, Emask 0x%x\n", + err_mask); + rc = -EIO; + goto fail; + } + } + } + + if (ap) { + rc = ap->ops->set_lpm(link, policy, hints); + if (!rc && ap->slave_link) + rc = ap->ops->set_lpm(ap->slave_link, policy, hints); + } else + rc = sata_pmp_set_lpm(link, policy, hints); + + /* + * Attribute link config failure to the first (LPM) enabled + * device on the link. + */ + if (rc) { + if (rc == -EOPNOTSUPP) { + link->flags |= ATA_LFLAG_NO_LPM; + return 0; + } + dev = lpm_dev ? lpm_dev : link_dev; + goto fail; + } + + /* host config updated, enable DIPM if transitioning to MIN_POWER */ + ata_for_each_dev(dev, link, ENABLED) { + if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) { + err_mask = ata_dev_set_feature(dev, + SETFEATURES_SATA_ENABLE, SATA_DIPM); + if (err_mask && err_mask != AC_ERR_DEV) { + ata_dev_printk(dev, KERN_WARNING, + "failed to enable DIPM, Emask 0x%x\n", + err_mask); + rc = -EIO; + goto fail; + } + } + } + + link->lpm_policy = policy; + if (ap && ap->slave_link) + ap->slave_link->lpm_policy = policy; + return 0; + +fail: + /* if no device or only one more chance is left, disable LPM */ + if (!dev || ehc->tries[dev->devno] <= 2) { + ata_link_printk(link, KERN_WARNING, + "disabling LPM on the link\n"); + link->flags |= ATA_LFLAG_NO_LPM; + } + if (r_failed_dev) + *r_failed_dev = dev; + return rc; +} + static int ata_link_nr_enabled(struct ata_link *link) { struct ata_device *dev; @@ -3235,6 +3401,10 @@ static int ata_eh_skip_recovery(struct ata_link *link) if (link->flags & ATA_LFLAG_DISABLED) return 1; + /* skip if explicitly requested */ + if (ehc->i.flags & ATA_EHI_NO_RECOVERY) + return 1; + /* thaw frozen port and recover failed devices */ if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) return 0; @@ -3284,6 +3454,16 @@ static int ata_eh_schedule_probe(struct ata_device *dev) ehc->saved_xfer_mode[dev->devno] = 0; ehc->saved_ncq_enabled &= ~(1 << dev->devno); + /* the link maybe in a deep sleep, wake it up */ + if (link->lpm_policy > ATA_LPM_MAX_POWER) { + if (ata_is_host_link(link)) + link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, + ATA_LPM_EMPTY); + else + sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, + ATA_LPM_EMPTY); + } + /* Record and count probe trials on the ering. The specific * error mask used is irrelevant. Because a successful device * detection clears the ering, this count accumulates only if @@ -3385,8 +3565,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, { struct ata_link *link; struct ata_device *dev; - int nr_failed_devs; - int rc; + int rc, nr_fails; unsigned long flags, deadline; DPRINTK("ENTER\n"); @@ -3427,7 +3606,6 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, retry: rc = 0; - nr_failed_devs = 0; /* if UNLOADING, finish immediately */ if (ap->pflags & ATA_PFLAG_UNLOADING) @@ -3497,8 +3675,10 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (time_before_eq(deadline, now)) break; + ata_eh_release(ap); deadline = wait_for_completion_timeout(&ap->park_req_pending, deadline - now); + ata_eh_acquire(ap); } while (deadline); ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ALL) { @@ -3512,13 +3692,17 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, } /* the rest */ - ata_for_each_link(link, ap, EDGE) { + nr_fails = 0; + ata_for_each_link(link, ap, PMP_FIRST) { struct ata_eh_context *ehc = &link->eh_context; + if (sata_pmp_attached(ap) && ata_is_host_link(link)) + goto config_lpm; + /* revalidate existing devices and attach new ones */ rc = ata_eh_revalidate_and_attach(link, &dev); if (rc) - goto dev_fail; + goto rest_fail; /* if PMP got attached, return, pmp EH will take care of it */ if (link->device->class == ATA_DEV_PMP) { @@ -3530,7 +3714,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, if (ehc->i.flags & ATA_EHI_SETMODE) { rc = ata_set_mode(link, &dev); if (rc) - goto dev_fail; + goto rest_fail; ehc->i.flags &= ~ATA_EHI_SETMODE; } @@ -3543,7 +3727,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, continue; rc = atapi_eh_clear_ua(dev); if (rc) - goto dev_fail; + goto rest_fail; } } @@ -3553,21 +3737,25 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, continue; rc = ata_eh_maybe_retry_flush(dev); if (rc) - goto dev_fail; + goto rest_fail; } + config_lpm: /* configure link power saving */ - if (ehc->i.action & ATA_EH_LPM) - ata_for_each_dev(dev, link, ALL) - ata_dev_enable_pm(dev, ap->pm_policy); + if (link->lpm_policy != ap->target_lpm_policy) { + rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); + if (rc) + goto rest_fail; + } /* this link is okay now */ ehc->i.flags = 0; continue; -dev_fail: - nr_failed_devs++; - ata_eh_handle_dev_fail(dev, rc); + rest_fail: + nr_fails++; + if (dev) + ata_eh_handle_dev_fail(dev, rc); if (ap->pflags & ATA_PFLAG_FROZEN) { /* PMP reset requires working host port. @@ -3579,7 +3767,7 @@ dev_fail: } } - if (nr_failed_devs) + if (nr_fails) goto retry; out: diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c index 224faab..3120596 100644 --- a/drivers/ata/libata-pmp.c +++ b/drivers/ata/libata-pmp.c @@ -11,6 +11,7 @@ #include #include #include "libata.h" +#include "libata-transport.h" const struct ata_port_operations sata_pmp_port_ops = { .inherits = &sata_port_ops, @@ -185,6 +186,27 @@ int sata_pmp_scr_write(struct ata_link *link, int reg, u32 val) } /** + * sata_pmp_set_lpm - configure LPM for a PMP link + * @link: PMP link to configure LPM for + * @policy: target LPM policy + * @hints: LPM hints + * + * Configure LPM for @link. This function will contain any PMP + * specific workarounds if necessary. + * + * LOCKING: + * EH context. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int sata_pmp_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints) +{ + return sata_link_scr_lpm(link, policy, true); +} + +/** * sata_pmp_read_gscr - read GSCR block of SATA PMP * @dev: PMP device * @gscr: buffer to read GSCR block into @@ -312,10 +334,10 @@ static int sata_pmp_configure(struct ata_device *dev, int print_info) return rc; } -static int sata_pmp_init_links(struct ata_port *ap, int nr_ports) +static int sata_pmp_init_links (struct ata_port *ap, int nr_ports) { struct ata_link *pmp_link = ap->pmp_link; - int i; + int i, err; if (!pmp_link) { pmp_link = kzalloc(sizeof(pmp_link[0]) * SATA_PMP_MAX_PORTS, @@ -327,6 +349,13 @@ static int sata_pmp_init_links(struct ata_port *ap, int nr_ports) ata_link_init(ap, &pmp_link[i], i); ap->pmp_link = pmp_link; + + for (i = 0; i < SATA_PMP_MAX_PORTS; i++) { + err = ata_tlink_add(&pmp_link[i]); + if (err) { + goto err_tlink; + } + } } for (i = 0; i < nr_ports; i++) { @@ -339,6 +368,12 @@ static int sata_pmp_init_links(struct ata_port *ap, int nr_ports) } return 0; + err_tlink: + while (--i >= 0) + ata_tlink_delete(&pmp_link[i]); + kfree(pmp_link); + ap->pmp_link = NULL; + return err; } static void sata_pmp_quirks(struct ata_port *ap) @@ -351,6 +386,9 @@ static void sata_pmp_quirks(struct ata_port *ap) if (vendor == 0x1095 && devid == 0x3726) { /* sil3726 quirks */ ata_for_each_link(link, ap, EDGE) { + /* link reports offline after LPM */ + link->flags |= ATA_LFLAG_NO_LPM; + /* Class code report is unreliable and SRST * times out under certain configurations. */ @@ -366,6 +404,9 @@ static void sata_pmp_quirks(struct ata_port *ap) } else if (vendor == 0x1095 && devid == 0x4723) { /* sil4723 quirks */ ata_for_each_link(link, ap, EDGE) { + /* link reports offline after LPM */ + link->flags |= ATA_LFLAG_NO_LPM; + /* class code report is unreliable */ if (link->pmp < 2) link->flags |= ATA_LFLAG_ASSUME_ATA; @@ -378,6 +419,9 @@ static void sata_pmp_quirks(struct ata_port *ap) } else if (vendor == 0x1095 && devid == 0x4726) { /* sil4726 quirks */ ata_for_each_link(link, ap, EDGE) { + /* link reports offline after LPM */ + link->flags |= ATA_LFLAG_NO_LPM; + /* Class code report is unreliable and SRST * times out under certain configurations. * Config device can be at port 0 or 5 and @@ -938,15 +982,25 @@ static int sata_pmp_eh_recover(struct ata_port *ap) if (rc) goto link_fail; - /* Connection status might have changed while resetting other - * links, check SATA_PMP_GSCR_ERROR before returning. - */ - /* clear SNotification */ rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); if (rc == 0) sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); + /* + * If LPM is active on any fan-out port, hotplug wouldn't + * work. Return w/ PHY event notification disabled. + */ + ata_for_each_link(link, ap, EDGE) + if (link->lpm_policy > ATA_LPM_MAX_POWER) + return 0; + + /* + * Connection status might have changed while resetting other + * links, enable notification and check SATA_PMP_GSCR_ERROR + * before returning. + */ + /* enable notification */ if (pmp_dev->flags & ATA_DFLAG_AN) { gscr[SATA_PMP_GSCR_FEAT_EN] |= SATA_PMP_FEAT_NOTIFY; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a89172c..d050e07 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -51,8 +51,8 @@ #include #include "libata.h" +#include "libata-transport.h" -#define SECTOR_SIZE 512 #define ATA_SCSI_RBUF_SIZE 4096 static DEFINE_SPINLOCK(ata_scsi_rbuf_lock); @@ -64,9 +64,6 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev); static struct ata_device *ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev); -static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, - unsigned int id, unsigned int lun); - #define RW_RECOVERY_MPAGE 0x1 #define RW_RECOVERY_MPAGE_LEN 12 @@ -106,83 +103,55 @@ static const u8 def_control_mpage[CONTROL_MPAGE_LEN] = { 0, 30 /* extended self test time, see 05-359r1 */ }; -/* - * libata transport template. libata doesn't do real transport stuff. - * It just needs the eh_timed_out hook. - */ -static struct scsi_transport_template ata_scsi_transport_template = { - .eh_strategy_handler = ata_scsi_error, - .eh_timed_out = ata_scsi_timed_out, - .user_scan = ata_scsi_user_scan, -}; - - -static const struct { - enum link_pm value; - const char *name; -} link_pm_policy[] = { - { NOT_AVAILABLE, "max_performance" }, - { MIN_POWER, "min_power" }, - { MAX_PERFORMANCE, "max_performance" }, - { MEDIUM_POWER, "medium_power" }, +static const char *ata_lpm_policy_names[] = { + [ATA_LPM_UNKNOWN] = "max_performance", + [ATA_LPM_MAX_POWER] = "max_performance", + [ATA_LPM_MED_POWER] = "medium_power", + [ATA_LPM_MIN_POWER] = "min_power", }; -static const char *ata_scsi_lpm_get(enum link_pm policy) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(link_pm_policy); i++) - if (link_pm_policy[i].value == policy) - return link_pm_policy[i].name; - - return NULL; -} - -static ssize_t ata_scsi_lpm_put(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t ata_scsi_lpm_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct Scsi_Host *shost = class_to_shost(dev); struct ata_port *ap = ata_shost_to_port(shost); - enum link_pm policy = 0; - int i; + enum ata_lpm_policy policy; + unsigned long flags; - /* - * we are skipping array location 0 on purpose - this - * is because a value of NOT_AVAILABLE is displayed - * to the user as max_performance, but when the user - * writes "max_performance", they actually want the - * value to match MAX_PERFORMANCE. - */ - for (i = 1; i < ARRAY_SIZE(link_pm_policy); i++) { - const int len = strlen(link_pm_policy[i].name); - if (strncmp(link_pm_policy[i].name, buf, len) == 0) { - policy = link_pm_policy[i].value; + /* UNKNOWN is internal state, iterate from MAX_POWER */ + for (policy = ATA_LPM_MAX_POWER; + policy < ARRAY_SIZE(ata_lpm_policy_names); policy++) { + const char *name = ata_lpm_policy_names[policy]; + + if (strncmp(name, buf, strlen(name)) == 0) break; - } } - if (!policy) + if (policy == ARRAY_SIZE(ata_lpm_policy_names)) return -EINVAL; - ata_lpm_schedule(ap, policy); + spin_lock_irqsave(ap->lock, flags); + ap->target_lpm_policy = policy; + ata_port_schedule_eh(ap); + spin_unlock_irqrestore(ap->lock, flags); + return count; } -static ssize_t -ata_scsi_lpm_show(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t ata_scsi_lpm_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct Scsi_Host *shost = class_to_shost(dev); struct ata_port *ap = ata_shost_to_port(shost); - const char *policy = - ata_scsi_lpm_get(ap->pm_policy); - if (!policy) + if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names)) return -EINVAL; - return snprintf(buf, 23, "%s\n", policy); + return snprintf(buf, PAGE_SIZE, "%s\n", + ata_lpm_policy_names[ap->target_lpm_policy]); } DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, - ata_scsi_lpm_show, ata_scsi_lpm_put); + ata_scsi_lpm_show, ata_scsi_lpm_store); EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy); static ssize_t ata_scsi_park_show(struct device *device, @@ -516,7 +485,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) memset(scsi_cmd, 0, sizeof(scsi_cmd)); if (args[3]) { - argsize = SECTOR_SIZE * args[3]; + argsize = ATA_SECT_SIZE * args[3]; argbuf = kmalloc(argsize, GFP_KERNEL); if (argbuf == NULL) { rc = -ENOMEM; @@ -1150,8 +1119,9 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); } else { /* ATA devices must be sector aligned */ + sdev->sector_size = ata_id_logical_sector_size(dev->id); blk_queue_update_dma_alignment(sdev->request_queue, - ATA_SECT_SIZE - 1); + sdev->sector_size - 1); sdev->manage_start_stop = 1; } @@ -1166,6 +1136,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); } + dev->sdev = sdev; return 0; } @@ -1696,7 +1667,7 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) goto nothing_to_do; qc->flags |= ATA_QCFLAG_IO; - qc->nbytes = n_block * ATA_SECT_SIZE; + qc->nbytes = n_block * scmd->device->sector_size; rc = ata_build_rw_tf(&qc->tf, qc->dev, block, n_block, tf_flags, qc->tag); @@ -2001,6 +1972,7 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf) 0x89, /* page 0x89, ata info page */ 0xb0, /* page 0xb0, block limits page */ 0xb1, /* page 0xb1, block device characteristics page */ + 0xb2, /* page 0xb2, thin provisioning page */ }; rbuf[3] = sizeof(pages); /* number of supported VPD pages */ @@ -2123,7 +2095,7 @@ static unsigned int ata_scsiop_inq_89(struct ata_scsi_args *args, u8 *rbuf) static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) { - u32 min_io_sectors; + u16 min_io_sectors; rbuf[1] = 0xb0; rbuf[3] = 0x3c; /* required VPD size with unmap support */ @@ -2135,10 +2107,7 @@ static unsigned int ata_scsiop_inq_b0(struct ata_scsi_args *args, u8 *rbuf) * logical than physical sector size we need to figure out what the * latter is. */ - if (ata_id_has_large_logical_sectors(args->id)) - min_io_sectors = ata_id_logical_per_physical_sectors(args->id); - else - min_io_sectors = 1; + min_io_sectors = 1 << ata_id_log2_per_physical_sector(args->id); put_unaligned_be16(min_io_sectors, &rbuf[6]); /* @@ -2172,6 +2141,16 @@ static unsigned int ata_scsiop_inq_b1(struct ata_scsi_args *args, u8 *rbuf) return 0; } +static unsigned int ata_scsiop_inq_b2(struct ata_scsi_args *args, u8 *rbuf) +{ + /* SCSI Thin Provisioning VPD page: SBC-3 rev 22 or later */ + rbuf[1] = 0xb2; + rbuf[3] = 0x4; + rbuf[5] = 1 << 6; /* TPWS */ + + return 0; +} + /** * ata_scsiop_noop - Command handler that simply returns success. * @args: device IDENTIFY data / SCSI command of interest. @@ -2397,21 +2376,13 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) { struct ata_device *dev = args->dev; u64 last_lba = dev->n_sectors - 1; /* LBA of the last block */ - u8 log_per_phys = 0; - u16 lowest_aligned = 0; - u16 word_106 = dev->id[106]; - u16 word_209 = dev->id[209]; - - if ((word_106 & 0xc000) == 0x4000) { - /* Number and offset of logical sectors per physical sector */ - if (word_106 & (1 << 13)) - log_per_phys = word_106 & 0xf; - if ((word_209 & 0xc000) == 0x4000) { - u16 first = dev->id[209] & 0x3fff; - if (first > 0) - lowest_aligned = (1 << log_per_phys) - first; - } - } + u32 sector_size; /* physical sector size in bytes */ + u8 log2_per_phys; + u16 lowest_aligned; + + sector_size = ata_id_logical_sector_size(dev->id); + log2_per_phys = ata_id_log2_per_physical_sector(dev->id); + lowest_aligned = ata_id_logical_sector_offset(dev->id, log2_per_phys); VPRINTK("ENTER\n"); @@ -2426,8 +2397,10 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[3] = last_lba; /* sector size */ - rbuf[6] = ATA_SECT_SIZE >> 8; - rbuf[7] = ATA_SECT_SIZE & 0xff; + rbuf[4] = sector_size >> (8 * 3); + rbuf[5] = sector_size >> (8 * 2); + rbuf[6] = sector_size >> (8 * 1); + rbuf[7] = sector_size; } else { /* sector count, 64-bit */ rbuf[0] = last_lba >> (8 * 7); @@ -2440,11 +2413,13 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[7] = last_lba; /* sector size */ - rbuf[10] = ATA_SECT_SIZE >> 8; - rbuf[11] = ATA_SECT_SIZE & 0xff; + rbuf[ 8] = sector_size >> (8 * 3); + rbuf[ 9] = sector_size >> (8 * 2); + rbuf[10] = sector_size >> (8 * 1); + rbuf[11] = sector_size; rbuf[12] = 0; - rbuf[13] = log_per_phys; + rbuf[13] = log2_per_phys; rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; @@ -2888,9 +2863,8 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) tf->device = dev->devno ? tf->device | ATA_DEV1 : tf->device & ~ATA_DEV1; - /* READ/WRITE LONG use a non-standard sect_size */ - qc->sect_size = ATA_SECT_SIZE; switch (tf->command) { + /* READ/WRITE LONG use a non-standard sect_size */ case ATA_CMD_READ_LONG: case ATA_CMD_READ_LONG_ONCE: case ATA_CMD_WRITE_LONG: @@ -2898,6 +2872,45 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) if (tf->protocol != ATA_PROT_PIO || tf->nsect != 1) goto invalid_fld; qc->sect_size = scsi_bufflen(scmd); + break; + + /* commands using reported Logical Block size (e.g. 512 or 4K) */ + case ATA_CMD_CFA_WRITE_NE: + case ATA_CMD_CFA_TRANS_SECT: + case ATA_CMD_CFA_WRITE_MULT_NE: + /* XXX: case ATA_CMD_CFA_WRITE_SECTORS_WITHOUT_ERASE: */ + case ATA_CMD_READ: + case ATA_CMD_READ_EXT: + case ATA_CMD_READ_QUEUED: + /* XXX: case ATA_CMD_READ_QUEUED_EXT: */ + case ATA_CMD_FPDMA_READ: + case ATA_CMD_READ_MULTI: + case ATA_CMD_READ_MULTI_EXT: + case ATA_CMD_PIO_READ: + case ATA_CMD_PIO_READ_EXT: + case ATA_CMD_READ_STREAM_DMA_EXT: + case ATA_CMD_READ_STREAM_EXT: + case ATA_CMD_VERIFY: + case ATA_CMD_VERIFY_EXT: + case ATA_CMD_WRITE: + case ATA_CMD_WRITE_EXT: + case ATA_CMD_WRITE_FUA_EXT: + case ATA_CMD_WRITE_QUEUED: + case ATA_CMD_WRITE_QUEUED_FUA_EXT: + case ATA_CMD_FPDMA_WRITE: + case ATA_CMD_WRITE_MULTI: + case ATA_CMD_WRITE_MULTI_EXT: + case ATA_CMD_WRITE_MULTI_FUA_EXT: + case ATA_CMD_PIO_WRITE: + case ATA_CMD_PIO_WRITE_EXT: + case ATA_CMD_WRITE_STREAM_DMA_EXT: + case ATA_CMD_WRITE_STREAM_EXT: + qc->sect_size = scmd->device->sector_size; + break; + + /* Everything else uses 512 byte "sectors" */ + default: + qc->sect_size = ATA_SECT_SIZE; } /* @@ -3250,6 +3263,9 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd, case 0xb1: ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b1); break; + case 0xb2: + ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2); + break; default: ata_scsi_invalid_field(cmd, done); break; @@ -3334,7 +3350,7 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht) *(struct ata_port **)&shost->hostdata[0] = ap; ap->scsi_host = shost; - shost->transportt = &ata_scsi_transport_template; + shost->transportt = ata_scsi_transport_template; shost->unique_id = ap->print_id; shost->max_id = 16; shost->max_lun = 1; @@ -3393,6 +3409,8 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync) if (!IS_ERR(sdev)) { dev->sdev = sdev; scsi_device_put(sdev); + } else { + dev->sdev = NULL; } } } @@ -3616,8 +3634,8 @@ void ata_scsi_hotplug(struct work_struct *work) * RETURNS: * Zero. */ -static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, - unsigned int id, unsigned int lun) +int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, + unsigned int id, unsigned int lun) { struct ata_port *ap = ata_shost_to_port(shost); unsigned long flags; diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 3b82d8e..14d18bf 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -222,7 +222,7 @@ int ata_sff_busy_sleep(struct ata_port *ap, timeout = ata_deadline(timer_start, tmout_pat); while (status != 0xff && (status & ATA_BUSY) && time_before(jiffies, timeout)) { - msleep(50); + ata_msleep(ap, 50); status = ata_sff_busy_wait(ap, ATA_BUSY, 3); } @@ -234,7 +234,7 @@ int ata_sff_busy_sleep(struct ata_port *ap, timeout = ata_deadline(timer_start, tmout); while (status != 0xff && (status & ATA_BUSY) && time_before(jiffies, timeout)) { - msleep(50); + ata_msleep(ap, 50); status = ap->ops->sff_check_status(ap); } @@ -360,7 +360,7 @@ static void ata_dev_select(struct ata_port *ap, unsigned int device, if (wait) { if (can_sleep && ap->link.device[device].class == ATA_DEV_ATAPI) - msleep(150); + ata_msleep(ap, 150); ata_wait_idle(ap); } } @@ -418,6 +418,7 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) if (ioaddr->ctl_addr) iowrite8(tf->ctl, ioaddr->ctl_addr); ap->last_ctl = tf->ctl; + ata_wait_idle(ap); } if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) { @@ -453,6 +454,8 @@ void ata_sff_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) iowrite8(tf->device, ioaddr->device_addr); VPRINTK("device 0x%X\n", tf->device); } + + ata_wait_idle(ap); } EXPORT_SYMBOL_GPL(ata_sff_tf_load); @@ -1042,7 +1045,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq) { - struct ata_eh_info *ehi = &ap->link.eh_info; + struct ata_link *link = qc->dev->link; + struct ata_eh_info *ehi = &link->eh_info; unsigned long flags = 0; int poll_next; @@ -1298,8 +1302,14 @@ fsm_start: } EXPORT_SYMBOL_GPL(ata_sff_hsm_move); -void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay) +void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay) { + struct ata_port *ap = link->ap; + + WARN_ON((ap->sff_pio_task_link != NULL) && + (ap->sff_pio_task_link != link)); + ap->sff_pio_task_link = link; + /* may fail if ata_sff_flush_pio_task() in progress */ queue_delayed_work(ata_sff_wq, &ap->sff_pio_task, msecs_to_jiffies(delay)); @@ -1321,14 +1331,18 @@ static void ata_sff_pio_task(struct work_struct *work) { struct ata_port *ap = container_of(work, struct ata_port, sff_pio_task.work); + struct ata_link *link = ap->sff_pio_task_link; struct ata_queued_cmd *qc; u8 status; int poll_next; + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ - qc = ata_qc_from_tag(ap, ap->link.active_tag); - if (!qc) + qc = ata_qc_from_tag(ap, link->active_tag); + if (!qc) { + ap->sff_pio_task_link = NULL; return; + } fsm_start: WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE); @@ -1342,14 +1356,19 @@ fsm_start: */ status = ata_sff_busy_wait(ap, ATA_BUSY, 5); if (status & ATA_BUSY) { - msleep(2); + ata_msleep(ap, 2); status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { - ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE); + ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); return; } } + /* + * hsm_move() may trigger another command to be processed. + * clean the link beforehand. + */ + ap->sff_pio_task_link = NULL; /* move the HSM */ poll_next = ata_sff_hsm_move(ap, qc, status, 1); @@ -1376,6 +1395,7 @@ fsm_start: unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* Use polling pio if the LLD doesn't handle * interrupt driven pio and atapi CDB interrupt. @@ -1396,7 +1416,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST_LAST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; @@ -1409,7 +1429,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_WRITE) { /* PIO data out protocol */ ap->hsm_task_state = HSM_ST_FIRST; - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* always send first data block using the * ata_sff_pio_task() codepath. @@ -1419,7 +1439,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* if polling, ata_sff_pio_task() handles the * rest. otherwise, interrupt handler takes @@ -1441,7 +1461,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) || (qc->tf.flags & ATA_TFLAG_POLLING)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: @@ -1917,7 +1937,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask, unsigned int dev1 = devmask & (1 << 1); int rc, ret = 0; - msleep(ATA_WAIT_AFTER_RESET); + ata_msleep(ap, ATA_WAIT_AFTER_RESET); /* always check readiness of the master device */ rc = ata_sff_wait_ready(link, deadline); @@ -1946,7 +1966,7 @@ int ata_sff_wait_after_reset(struct ata_link *link, unsigned int devmask, lbal = ioread8(ioaddr->lbal_addr); if ((nsect == 1) && (lbal == 1)) break; - msleep(50); /* give drive a breather */ + ata_msleep(ap, 50); /* give drive a breather */ } rc = ata_sff_wait_ready(link, deadline); @@ -2734,6 +2754,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep); unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* defer PIO handling to sff_qc_issue */ if (!ata_is_dma(qc->tf.protocol)) @@ -2762,7 +2783,7 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: @@ -3321,7 +3342,7 @@ int __init ata_sff_init(void) return 0; } -void __exit ata_sff_exit(void) +void ata_sff_exit(void) { destroy_workqueue(ata_sff_wq); } diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c new file mode 100644 index 0000000..ce9dc62 --- /dev/null +++ b/drivers/ata/libata-transport.c @@ -0,0 +1,774 @@ +/* + * Copyright 2008 ioogle, Inc. All rights reserved. + * Released under GPL v2. + * + * Libata transport class. + * + * The ATA transport class contains common code to deal with ATA HBAs, + * an approximated representation of ATA topologies in the driver model, + * and various sysfs attributes to expose these topologies and management + * interfaces to user-space. + * + * There are 3 objects defined in in this class: + * - ata_port + * - ata_link + * - ata_device + * Each port has a link object. Each link can have up to two devices for PATA + * and generally one for SATA. + * If there is SATA port multiplier [PMP], 15 additional ata_link object are + * created. + * + * These objects are created when the ata host is initialized and when a PMP is + * found. They are removed only when the HBA is removed, cleaned before the + * error handler runs. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libata.h" +#include "libata-transport.h" + +#define ATA_PORT_ATTRS 2 +#define ATA_LINK_ATTRS 3 +#define ATA_DEV_ATTRS 9 + +struct scsi_transport_template; +struct scsi_transport_template *ata_scsi_transport_template; + +struct ata_internal { + struct scsi_transport_template t; + + struct device_attribute private_port_attrs[ATA_PORT_ATTRS]; + struct device_attribute private_link_attrs[ATA_LINK_ATTRS]; + struct device_attribute private_dev_attrs[ATA_DEV_ATTRS]; + + struct transport_container link_attr_cont; + struct transport_container dev_attr_cont; + + /* + * The array of null terminated pointers to attributes + * needed by scsi_sysfs.c + */ + struct device_attribute *link_attrs[ATA_LINK_ATTRS + 1]; + struct device_attribute *port_attrs[ATA_PORT_ATTRS + 1]; + struct device_attribute *dev_attrs[ATA_DEV_ATTRS + 1]; +}; +#define to_ata_internal(tmpl) container_of(tmpl, struct ata_internal, t) + + +#define tdev_to_device(d) \ + container_of((d), struct ata_device, tdev) +#define transport_class_to_dev(dev) \ + tdev_to_device((dev)->parent) + +#define tdev_to_link(d) \ + container_of((d), struct ata_link, tdev) +#define transport_class_to_link(dev) \ + tdev_to_link((dev)->parent) + +#define tdev_to_port(d) \ + container_of((d), struct ata_port, tdev) +#define transport_class_to_port(dev) \ + tdev_to_port((dev)->parent) + + +/* Device objects are always created whit link objects */ +static int ata_tdev_add(struct ata_device *dev); +static void ata_tdev_delete(struct ata_device *dev); + + +/* + * Hack to allow attributes of the same name in different objects. + */ +#define ATA_DEVICE_ATTR(_prefix,_name,_mode,_show,_store) \ + struct device_attribute device_attr_##_prefix##_##_name = \ + __ATTR(_name,_mode,_show,_store) + +#define ata_bitfield_name_match(title, table) \ +static ssize_t \ +get_ata_##title##_names(u32 table_key, char *buf) \ +{ \ + char *prefix = ""; \ + ssize_t len = 0; \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(table); i++) { \ + if (table[i].value & table_key) { \ + len += sprintf(buf + len, "%s%s", \ + prefix, table[i].name); \ + prefix = ", "; \ + } \ + } \ + len += sprintf(buf + len, "\n"); \ + return len; \ +} + +#define ata_bitfield_name_search(title, table) \ +static ssize_t \ +get_ata_##title##_names(u32 table_key, char *buf) \ +{ \ + ssize_t len = 0; \ + int i; \ + \ + for (i = 0; i < ARRAY_SIZE(table); i++) { \ + if (table[i].value == table_key) { \ + len += sprintf(buf + len, "%s", \ + table[i].name); \ + break; \ + } \ + } \ + len += sprintf(buf + len, "\n"); \ + return len; \ +} + +static struct { + u32 value; + char *name; +} ata_class_names[] = { + { ATA_DEV_UNKNOWN, "unknown" }, + { ATA_DEV_ATA, "ata" }, + { ATA_DEV_ATA_UNSUP, "ata" }, + { ATA_DEV_ATAPI, "atapi" }, + { ATA_DEV_ATAPI_UNSUP, "atapi" }, + { ATA_DEV_PMP, "pmp" }, + { ATA_DEV_PMP_UNSUP, "pmp" }, + { ATA_DEV_SEMB, "semb" }, + { ATA_DEV_SEMB_UNSUP, "semb" }, + { ATA_DEV_NONE, "none" } +}; +ata_bitfield_name_search(class, ata_class_names) + + +static struct { + u32 value; + char *name; +} ata_err_names[] = { + { AC_ERR_DEV, "DeviceError" }, + { AC_ERR_HSM, "HostStateMachineError" }, + { AC_ERR_TIMEOUT, "Timeout" }, + { AC_ERR_MEDIA, "MediaError" }, + { AC_ERR_ATA_BUS, "BusError" }, + { AC_ERR_HOST_BUS, "HostBusError" }, + { AC_ERR_SYSTEM, "SystemError" }, + { AC_ERR_INVALID, "InvalidArg" }, + { AC_ERR_OTHER, "Unknown" }, + { AC_ERR_NODEV_HINT, "NoDeviceHint" }, + { AC_ERR_NCQ, "NCQError" } +}; +ata_bitfield_name_match(err, ata_err_names) + +static struct { + u32 value; + char *name; +} ata_xfer_names[] = { + { XFER_UDMA_7, "XFER_UDMA_7" }, + { XFER_UDMA_6, "XFER_UDMA_6" }, + { XFER_UDMA_5, "XFER_UDMA_5" }, + { XFER_UDMA_4, "XFER_UDMA_4" }, + { XFER_UDMA_3, "XFER_UDMA_3" }, + { XFER_UDMA_2, "XFER_UDMA_2" }, + { XFER_UDMA_1, "XFER_UDMA_1" }, + { XFER_UDMA_0, "XFER_UDMA_0" }, + { XFER_MW_DMA_4, "XFER_MW_DMA_4" }, + { XFER_MW_DMA_3, "XFER_MW_DMA_3" }, + { XFER_MW_DMA_2, "XFER_MW_DMA_2" }, + { XFER_MW_DMA_1, "XFER_MW_DMA_1" }, + { XFER_MW_DMA_0, "XFER_MW_DMA_0" }, + { XFER_SW_DMA_2, "XFER_SW_DMA_2" }, + { XFER_SW_DMA_1, "XFER_SW_DMA_1" }, + { XFER_SW_DMA_0, "XFER_SW_DMA_0" }, + { XFER_PIO_6, "XFER_PIO_6" }, + { XFER_PIO_5, "XFER_PIO_5" }, + { XFER_PIO_4, "XFER_PIO_4" }, + { XFER_PIO_3, "XFER_PIO_3" }, + { XFER_PIO_2, "XFER_PIO_2" }, + { XFER_PIO_1, "XFER_PIO_1" }, + { XFER_PIO_0, "XFER_PIO_0" }, + { XFER_PIO_SLOW, "XFER_PIO_SLOW" } +}; +ata_bitfield_name_match(xfer,ata_xfer_names) + +/* + * ATA Port attributes + */ +#define ata_port_show_simple(field, name, format_string, cast) \ +static ssize_t \ +show_ata_port_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct ata_port *ap = transport_class_to_port(dev); \ + \ + return snprintf(buf, 20, format_string, cast ap->field); \ +} + +#define ata_port_simple_attr(field, name, format_string, type) \ + ata_port_show_simple(field, name, format_string, (type)) \ +static DEVICE_ATTR(name, S_IRUGO, show_ata_port_##name, NULL) + +ata_port_simple_attr(nr_pmp_links, nr_pmp_links, "%d\n", int); +ata_port_simple_attr(stats.idle_irq, idle_irq, "%ld\n", unsigned long); + +static DECLARE_TRANSPORT_CLASS(ata_port_class, + "ata_port", NULL, NULL, NULL); + +static void ata_tport_release(struct device *dev) +{ + put_device(dev->parent); +} + +/** + * ata_is_port -- check if a struct device represents a ATA port + * @dev: device to check + * + * Returns: + * %1 if the device represents a ATA Port, %0 else + */ +int ata_is_port(const struct device *dev) +{ + return dev->release == ata_tport_release; +} + +static int ata_tport_match(struct attribute_container *cont, + struct device *dev) +{ + if (!ata_is_port(dev)) + return 0; + return &ata_scsi_transport_template->host_attrs.ac == cont; +} + +/** + * ata_tport_delete -- remove ATA PORT + * @port: ATA PORT to remove + * + * Removes the specified ATA PORT. Remove the associated link as well. + */ +void ata_tport_delete(struct ata_port *ap) +{ + struct device *dev = &ap->tdev; + + ata_tlink_delete(&ap->link); + + transport_remove_device(dev); + device_del(dev); + transport_destroy_device(dev); + put_device(dev); +} + +/** ata_tport_add - initialize a transport ATA port structure + * + * @parent: parent device + * @ap: existing ata_port structure + * + * Initialize a ATA port structure for sysfs. It will be added to the device + * tree below the device specified by @parent which could be a PCI device. + * + * Returns %0 on success + */ +int ata_tport_add(struct device *parent, + struct ata_port *ap) +{ + int error; + struct device *dev = &ap->tdev; + + device_initialize(dev); + + dev->parent = get_device(parent); + dev->release = ata_tport_release; + dev_set_name(dev, "ata%d", ap->print_id); + transport_setup_device(dev); + error = device_add(dev); + if (error) { + goto tport_err; + } + + transport_add_device(dev); + transport_configure_device(dev); + + error = ata_tlink_add(&ap->link); + if (error) { + goto tport_link_err; + } + return 0; + + tport_link_err: + transport_remove_device(dev); + device_del(dev); + + tport_err: + transport_destroy_device(dev); + put_device(dev); + return error; +} + + +/* + * ATA link attributes + */ + + +#define ata_link_show_linkspeed(field) \ +static ssize_t \ +show_ata_link_##field(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct ata_link *link = transport_class_to_link(dev); \ + \ + return sprintf(buf,"%s\n", sata_spd_string(fls(link->field))); \ +} + +#define ata_link_linkspeed_attr(field) \ + ata_link_show_linkspeed(field) \ +static DEVICE_ATTR(field, S_IRUGO, show_ata_link_##field, NULL) + +ata_link_linkspeed_attr(hw_sata_spd_limit); +ata_link_linkspeed_attr(sata_spd_limit); +ata_link_linkspeed_attr(sata_spd); + + +static DECLARE_TRANSPORT_CLASS(ata_link_class, + "ata_link", NULL, NULL, NULL); + +static void ata_tlink_release(struct device *dev) +{ + put_device(dev->parent); +} + +/** + * ata_is_link -- check if a struct device represents a ATA link + * @dev: device to check + * + * Returns: + * %1 if the device represents a ATA link, %0 else + */ +int ata_is_link(const struct device *dev) +{ + return dev->release == ata_tlink_release; +} + +static int ata_tlink_match(struct attribute_container *cont, + struct device *dev) +{ + struct ata_internal* i = to_ata_internal(ata_scsi_transport_template); + if (!ata_is_link(dev)) + return 0; + return &i->link_attr_cont.ac == cont; +} + +/** + * ata_tlink_delete -- remove ATA LINK + * @port: ATA LINK to remove + * + * Removes the specified ATA LINK. remove associated ATA device(s) as well. + */ +void ata_tlink_delete(struct ata_link *link) +{ + struct device *dev = &link->tdev; + struct ata_device *ata_dev; + + ata_for_each_dev(ata_dev, link, ALL) { + ata_tdev_delete(ata_dev); + } + + transport_remove_device(dev); + device_del(dev); + transport_destroy_device(dev); + put_device(dev); +} + +/** + * ata_tlink_add -- initialize a transport ATA link structure + * @link: allocated ata_link structure. + * + * Initialize an ATA LINK structure for sysfs. It will be added in the + * device tree below the ATA PORT it belongs to. + * + * Returns %0 on success + */ +int ata_tlink_add(struct ata_link *link) +{ + struct device *dev = &link->tdev; + struct ata_port *ap = link->ap; + struct ata_device *ata_dev; + int error; + + device_initialize(dev); + dev->parent = get_device(&ap->tdev); + dev->release = ata_tlink_release; + if (ata_is_host_link(link)) + dev_set_name(dev, "link%d", ap->print_id); + else + dev_set_name(dev, "link%d.%d", ap->print_id, link->pmp); + + transport_setup_device(dev); + + error = device_add(dev); + if (error) { + goto tlink_err; + } + + transport_add_device(dev); + transport_configure_device(dev); + + ata_for_each_dev(ata_dev, link, ALL) { + error = ata_tdev_add(ata_dev); + if (error) { + goto tlink_dev_err; + } + } + return 0; + tlink_dev_err: + while (--ata_dev >= link->device) { + ata_tdev_delete(ata_dev); + } + transport_remove_device(dev); + device_del(dev); + tlink_err: + transport_destroy_device(dev); + put_device(dev); + return error; +} + +/* + * ATA device attributes + */ + +#define ata_dev_show_class(title, field) \ +static ssize_t \ +show_ata_dev_##field(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct ata_device *ata_dev = transport_class_to_dev(dev); \ + \ + return get_ata_##title##_names(ata_dev->field, buf); \ +} + +#define ata_dev_attr(title, field) \ + ata_dev_show_class(title, field) \ +static DEVICE_ATTR(field, S_IRUGO, show_ata_dev_##field, NULL) + +ata_dev_attr(class, class); +ata_dev_attr(xfer, pio_mode); +ata_dev_attr(xfer, dma_mode); +ata_dev_attr(xfer, xfer_mode); + + +#define ata_dev_show_simple(field, format_string, cast) \ +static ssize_t \ +show_ata_dev_##field(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct ata_device *ata_dev = transport_class_to_dev(dev); \ + \ + return snprintf(buf, 20, format_string, cast ata_dev->field); \ +} + +#define ata_dev_simple_attr(field, format_string, type) \ + ata_dev_show_simple(field, format_string, (type)) \ +static DEVICE_ATTR(field, S_IRUGO, \ + show_ata_dev_##field, NULL) + +ata_dev_simple_attr(spdn_cnt, "%d\n", int); + +struct ata_show_ering_arg { + char* buf; + int written; +}; + +static int ata_show_ering(struct ata_ering_entry *ent, void *void_arg) +{ + struct ata_show_ering_arg* arg = void_arg; + struct timespec time; + + jiffies_to_timespec(ent->timestamp,&time); + arg->written += sprintf(arg->buf + arg->written, + "[%5lu.%06lu]", + time.tv_sec, time.tv_nsec); + arg->written += get_ata_err_names(ent->err_mask, + arg->buf + arg->written); + return 0; +} + +static ssize_t +show_ata_dev_ering(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ata_device *ata_dev = transport_class_to_dev(dev); + struct ata_show_ering_arg arg = { buf, 0 }; + + ata_ering_map(&ata_dev->ering, ata_show_ering, &arg); + return arg.written; +} + + +static DEVICE_ATTR(ering, S_IRUGO, show_ata_dev_ering, NULL); + +static ssize_t +show_ata_dev_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ata_device *ata_dev = transport_class_to_dev(dev); + int written = 0, i = 0; + + if (ata_dev->class == ATA_DEV_PMP) + return 0; + for(i=0;iid[i], + ((i+1) & 7) ? ' ' : '\n'); + } + return written; +} + +static DEVICE_ATTR(id, S_IRUGO, show_ata_dev_id, NULL); + +static ssize_t +show_ata_dev_gscr(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ata_device *ata_dev = transport_class_to_dev(dev); + int written = 0, i = 0; + + if (ata_dev->class != ATA_DEV_PMP) + return 0; + for(i=0;igscr[i], + ((i+1) & 3) ? ' ' : '\n'); + } + if (SATA_PMP_GSCR_DWORDS & 3) + buf[written-1] = '\n'; + return written; +} + +static DEVICE_ATTR(gscr, S_IRUGO, show_ata_dev_gscr, NULL); + +static DECLARE_TRANSPORT_CLASS(ata_dev_class, + "ata_device", NULL, NULL, NULL); + +static void ata_tdev_release(struct device *dev) +{ + put_device(dev->parent); +} + +/** + * ata_is_ata_dev -- check if a struct device represents a ATA device + * @dev: device to check + * + * Returns: + * %1 if the device represents a ATA device, %0 else + */ +int ata_is_ata_dev(const struct device *dev) +{ + return dev->release == ata_tdev_release; +} + +static int ata_tdev_match(struct attribute_container *cont, + struct device *dev) +{ + struct ata_internal* i = to_ata_internal(ata_scsi_transport_template); + if (!ata_is_ata_dev(dev)) + return 0; + return &i->dev_attr_cont.ac == cont; +} + +/** + * ata_tdev_free -- free a ATA LINK + * @dev: ATA PHY to free + * + * Frees the specified ATA PHY. + * + * Note: + * This function must only be called on a PHY that has not + * successfully been added using ata_tdev_add(). + */ +static void ata_tdev_free(struct ata_device *dev) +{ + transport_destroy_device(&dev->tdev); + put_device(&dev->tdev); +} + +/** + * ata_tdev_delete -- remove ATA device + * @port: ATA PORT to remove + * + * Removes the specified ATA device. + */ +static void ata_tdev_delete(struct ata_device *ata_dev) +{ + struct device *dev = &ata_dev->tdev; + + transport_remove_device(dev); + device_del(dev); + ata_tdev_free(ata_dev); +} + + +/** + * ata_tdev_add -- initialize a transport ATA device structure. + * @ata_dev: ata_dev structure. + * + * Initialize an ATA device structure for sysfs. It will be added in the + * device tree below the ATA LINK device it belongs to. + * + * Returns %0 on success + */ +static int ata_tdev_add(struct ata_device *ata_dev) +{ + struct device *dev = &ata_dev->tdev; + struct ata_link *link = ata_dev->link; + struct ata_port *ap = link->ap; + int error; + + device_initialize(dev); + dev->parent = get_device(&link->tdev); + dev->release = ata_tdev_release; + if (ata_is_host_link(link)) + dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno); + else + dev_set_name(dev, "dev%d.%d.0", ap->print_id, link->pmp); + + transport_setup_device(dev); + error = device_add(dev); + if (error) { + ata_tdev_free(ata_dev); + return error; + } + + transport_add_device(dev); + transport_configure_device(dev); + return 0; +} + + +/* + * Setup / Teardown code + */ + +#define SETUP_TEMPLATE(attrb, field, perm, test) \ + i->private_##attrb[count] = dev_attr_##field; \ + i->private_##attrb[count].attr.mode = perm; \ + i->attrb[count] = &i->private_##attrb[count]; \ + if (test) \ + count++ + +#define SETUP_LINK_ATTRIBUTE(field) \ + SETUP_TEMPLATE(link_attrs, field, S_IRUGO, 1) + +#define SETUP_PORT_ATTRIBUTE(field) \ + SETUP_TEMPLATE(port_attrs, field, S_IRUGO, 1) + +#define SETUP_DEV_ATTRIBUTE(field) \ + SETUP_TEMPLATE(dev_attrs, field, S_IRUGO, 1) + +/** + * ata_attach_transport -- instantiate ATA transport template + */ +struct scsi_transport_template *ata_attach_transport(void) +{ + struct ata_internal *i; + int count; + + i = kzalloc(sizeof(struct ata_internal), GFP_KERNEL); + if (!i) + return NULL; + + i->t.eh_strategy_handler = ata_scsi_error; + i->t.eh_timed_out = ata_scsi_timed_out; + i->t.user_scan = ata_scsi_user_scan; + + i->t.host_attrs.ac.attrs = &i->port_attrs[0]; + i->t.host_attrs.ac.class = &ata_port_class.class; + i->t.host_attrs.ac.match = ata_tport_match; + transport_container_register(&i->t.host_attrs); + + i->link_attr_cont.ac.class = &ata_link_class.class; + i->link_attr_cont.ac.attrs = &i->link_attrs[0]; + i->link_attr_cont.ac.match = ata_tlink_match; + transport_container_register(&i->link_attr_cont); + + i->dev_attr_cont.ac.class = &ata_dev_class.class; + i->dev_attr_cont.ac.attrs = &i->dev_attrs[0]; + i->dev_attr_cont.ac.match = ata_tdev_match; + transport_container_register(&i->dev_attr_cont); + + count = 0; + SETUP_PORT_ATTRIBUTE(nr_pmp_links); + SETUP_PORT_ATTRIBUTE(idle_irq); + BUG_ON(count > ATA_PORT_ATTRS); + i->port_attrs[count] = NULL; + + count = 0; + SETUP_LINK_ATTRIBUTE(hw_sata_spd_limit); + SETUP_LINK_ATTRIBUTE(sata_spd_limit); + SETUP_LINK_ATTRIBUTE(sata_spd); + BUG_ON(count > ATA_LINK_ATTRS); + i->link_attrs[count] = NULL; + + count = 0; + SETUP_DEV_ATTRIBUTE(class); + SETUP_DEV_ATTRIBUTE(pio_mode); + SETUP_DEV_ATTRIBUTE(dma_mode); + SETUP_DEV_ATTRIBUTE(xfer_mode); + SETUP_DEV_ATTRIBUTE(spdn_cnt); + SETUP_DEV_ATTRIBUTE(ering); + SETUP_DEV_ATTRIBUTE(id); + SETUP_DEV_ATTRIBUTE(gscr); + BUG_ON(count > ATA_DEV_ATTRS); + i->dev_attrs[count] = NULL; + + return &i->t; +} + +/** + * ata_release_transport -- release ATA transport template instance + * @t: transport template instance + */ +void ata_release_transport(struct scsi_transport_template *t) +{ + struct ata_internal *i = to_ata_internal(t); + + transport_container_unregister(&i->t.host_attrs); + transport_container_unregister(&i->link_attr_cont); + transport_container_unregister(&i->dev_attr_cont); + + kfree(i); +} + +__init int libata_transport_init(void) +{ + int error; + + error = transport_class_register(&ata_link_class); + if (error) + goto out_unregister_transport; + error = transport_class_register(&ata_port_class); + if (error) + goto out_unregister_link; + error = transport_class_register(&ata_dev_class); + if (error) + goto out_unregister_port; + return 0; + + out_unregister_port: + transport_class_unregister(&ata_port_class); + out_unregister_link: + transport_class_unregister(&ata_link_class); + out_unregister_transport: + return error; + +} + +void __exit libata_transport_exit(void) +{ + transport_class_unregister(&ata_link_class); + transport_class_unregister(&ata_port_class); + transport_class_unregister(&ata_dev_class); +} diff --git a/drivers/ata/libata-transport.h b/drivers/ata/libata-transport.h new file mode 100644 index 0000000..2820cf8 --- /dev/null +++ b/drivers/ata/libata-transport.h @@ -0,0 +1,18 @@ +#ifndef _LIBATA_TRANSPORT_H +#define _LIBATA_TRANSPORT_H + + +extern struct scsi_transport_template *ata_scsi_transport_template; + +int ata_tlink_add(struct ata_link *link); +void ata_tlink_delete(struct ata_link *link); + +int ata_tport_add(struct device *parent, struct ata_port *ap); +void ata_tport_delete(struct ata_port *ap); + +struct scsi_transport_template *ata_attach_transport(void); +void ata_release_transport(struct scsi_transport_template *t); + +__init int libata_transport_init(void); +void __exit libata_transport_exit(void); +#endif diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 9ce1ecc..a9be110 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -86,6 +86,8 @@ extern int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class, extern int ata_dev_configure(struct ata_device *dev); extern int sata_down_spd_limit(struct ata_link *link, u32 spd_limit); extern int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel); +extern unsigned int ata_dev_set_feature(struct ata_device *dev, + u8 enable, u8 feature); extern void ata_sg_clean(struct ata_queued_cmd *qc); extern void ata_qc_free(struct ata_queued_cmd *qc); extern void ata_qc_issue(struct ata_queued_cmd *qc); @@ -100,8 +102,7 @@ extern int sata_link_init_spd(struct ata_link *link); extern int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg); extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg); extern struct ata_port *ata_port_alloc(struct ata_host *host); -extern void ata_dev_enable_pm(struct ata_device *dev, enum link_pm policy); -extern void ata_lpm_schedule(struct ata_port *ap, enum link_pm); +extern const char *sata_spd_string(unsigned int spd); /* libata-acpi.c */ #ifdef CONFIG_ATA_ACPI @@ -137,10 +138,15 @@ extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); extern void ata_scsi_dev_rescan(struct work_struct *work); extern int ata_bus_probe(struct ata_port *ap); +extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, + unsigned int id, unsigned int lun); + /* libata-eh.c */ extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); +extern void ata_eh_acquire(struct ata_port *ap); +extern void ata_eh_release(struct ata_port *ap); extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); extern void ata_scsi_error(struct Scsi_Host *host); extern void ata_port_wait_eh(struct ata_port *ap); @@ -164,11 +170,16 @@ extern int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, ata_postreset_fn_t postreset, struct ata_link **r_failed_disk); extern void ata_eh_finish(struct ata_port *ap); +extern int ata_ering_map(struct ata_ering *ering, + int (*map_fn)(struct ata_ering_entry *, void *), + void *arg); /* libata-pmp.c */ #ifdef CONFIG_SATA_PMP extern int sata_pmp_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_pmp_scr_write(struct ata_link *link, int reg, u32 val); +extern int sata_pmp_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); extern int sata_pmp_attach(struct ata_device *dev); #else /* CONFIG_SATA_PMP */ static inline int sata_pmp_scr_read(struct ata_link *link, int reg, u32 *val) @@ -181,6 +192,12 @@ static inline int sata_pmp_scr_write(struct ata_link *link, int reg, u32 val) return -EINVAL; } +static inline int sata_pmp_set_lpm(struct ata_link *link, + enum ata_lpm_policy policy, unsigned hints) +{ + return -EINVAL; +} + static inline int sata_pmp_attach(struct ata_device *dev) { return -EINVAL; diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c index ba43f0f..2215632 100644 --- a/drivers/ata/pata_artop.c +++ b/drivers/ata/pata_artop.c @@ -74,7 +74,8 @@ static int artop6260_pre_reset(struct ata_link *link, unsigned long deadline) struct pci_dev *pdev = to_pci_dev(ap->host->dev); /* Odd numbered device ids are the units with enable bits (the -R cards) */ - if (pdev->device % 1 && !pci_test_config_bits(pdev, &artop_enable_bits[ap->port_no])) + if ((pdev->device & 1) && + !pci_test_config_bits(pdev, &artop_enable_bits[ap->port_no])) return -ENOENT; return ata_sff_prereset(link, deadline); diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c index 9cae65d..e1423cd 100644 --- a/drivers/ata/pata_bf54x.c +++ b/drivers/ata/pata_bf54x.c @@ -1046,7 +1046,7 @@ static void bfin_bus_post_reset(struct ata_port *ap, unsigned int devmask) dev1 = 0; break; } - msleep(50); /* give drive a breather */ + ata_msleep(ap, 50); /* give drive a breather */ } if (dev1) ata_sff_busy_sleep(ap, ATA_TMOUT_BOOT_QUICK, ATA_TMOUT_BOOT); @@ -1087,7 +1087,7 @@ static unsigned int bfin_bus_softreset(struct ata_port *ap, * * Old drivers/ide uses the 2mS rule and then waits for ready */ - msleep(150); + ata_msleep(ap, 150); /* Before we perform post reset processing we want to see if * the bus shows 0xFF because the odd clown forgets the D7 diff --git a/drivers/ata/pata_samsung_cf.c b/drivers/ata/pata_samsung_cf.c index 6f9cfb2..8a51d67 100644 --- a/drivers/ata/pata_samsung_cf.c +++ b/drivers/ata/pata_samsung_cf.c @@ -322,7 +322,7 @@ static int pata_s3c_wait_after_reset(struct ata_link *link, { int rc; - msleep(ATA_WAIT_AFTER_RESET); + ata_msleep(link->ap, ATA_WAIT_AFTER_RESET); /* always check readiness of the master device */ rc = ata_sff_wait_ready(link, deadline); diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c index fe36966..093715c 100644 --- a/drivers/ata/pata_scc.c +++ b/drivers/ata/pata_scc.c @@ -530,7 +530,7 @@ static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask, * * Old drivers/ide uses the 2mS rule and then waits for ready. */ - msleep(150); + ata_msleep(ap, 150); /* always check readiness of the master device */ rc = ata_sff_wait_ready(link, deadline); @@ -559,7 +559,7 @@ static int scc_wait_after_reset(struct ata_link *link, unsigned int devmask, lbal = in_be32(ioaddr->lbal_addr); if ((nsect == 1) && (lbal == 1)) break; - msleep(50); /* give drive a breather */ + ata_msleep(ap, 50); /* give drive a breather */ } rc = ata_sff_wait_ready(link, deadline); diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 5e65988..ac8d7d9 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -417,6 +417,8 @@ static void via_tf_load(struct ata_port *ap, const struct ata_taskfile *tf) tf->lbam, tf->lbah); } + + ata_wait_idle(ap); } static int via_port_start(struct ata_port *ap) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 7325f77..b0214d0 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -678,7 +678,7 @@ static void sata_fsl_port_stop(struct ata_port *ap) iowrite32(temp, hcr_base + HCONTROL); /* Poll for controller to go offline - should happen immediately */ - ata_wait_register(hcr_base + HSTATUS, ONLINE, ONLINE, 1, 1); + ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, ONLINE, 1, 1); ap->private_data = NULL; dma_free_coherent(dev, SATA_FSL_PORT_PRIV_DMA_SZ, @@ -729,7 +729,8 @@ try_offline_again: iowrite32(temp, hcr_base + HCONTROL); /* Poll for controller to go offline */ - temp = ata_wait_register(hcr_base + HSTATUS, ONLINE, ONLINE, 1, 500); + temp = ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, ONLINE, + 1, 500); if (temp & ONLINE) { ata_port_printk(ap, KERN_ERR, @@ -752,7 +753,7 @@ try_offline_again: /* * PHY reset should remain asserted for atleast 1ms */ - msleep(1); + ata_msleep(ap, 1); /* * Now, bring the host controller online again, this can take time @@ -766,7 +767,7 @@ try_offline_again: temp |= HCONTROL_PMP_ATTACHED; iowrite32(temp, hcr_base + HCONTROL); - temp = ata_wait_register(hcr_base + HSTATUS, ONLINE, 0, 1, 500); + temp = ata_wait_register(ap, hcr_base + HSTATUS, ONLINE, 0, 1, 500); if (!(temp & ONLINE)) { ata_port_printk(ap, KERN_ERR, @@ -784,7 +785,7 @@ try_offline_again: * presence */ - temp = ata_wait_register(hcr_base + HSTATUS, 0xFF, 0, 1, 500); + temp = ata_wait_register(ap, hcr_base + HSTATUS, 0xFF, 0, 1, 500); if ((!(temp & 0x10)) || ata_link_offline(link)) { ata_port_printk(ap, KERN_WARNING, "No Device OR PHYRDY change,Hstatus = 0x%x\n", @@ -797,7 +798,7 @@ try_offline_again: * Wait for the first D2H from device,i.e,signature update notification */ start_jiffies = jiffies; - temp = ata_wait_register(hcr_base + HSTATUS, 0xFF, 0x10, + temp = ata_wait_register(ap, hcr_base + HSTATUS, 0xFF, 0x10, 500, jiffies_to_msecs(deadline - start_jiffies)); if ((temp & 0xFF) != 0x18) { @@ -880,7 +881,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, iowrite32(pmp, CQPMP + hcr_base); iowrite32(1, CQ + hcr_base); - temp = ata_wait_register(CQ + hcr_base, 0x1, 0x1, 1, 5000); + temp = ata_wait_register(ap, CQ + hcr_base, 0x1, 0x1, 1, 5000); if (temp & 0x1) { ata_port_printk(ap, KERN_WARNING, "ATA_SRST issue failed\n"); @@ -896,7 +897,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, goto err; } - msleep(1); + ata_msleep(ap, 1); /* * SATA device enters reset state after receving a Control register @@ -915,7 +916,7 @@ static int sata_fsl_softreset(struct ata_link *link, unsigned int *class, if (pmp != SATA_PMP_CTRL_PORT) iowrite32(pmp, CQPMP + hcr_base); iowrite32(1, CQ + hcr_base); - msleep(150); /* ?? */ + ata_msleep(ap, 150); /* ?? */ /* * The above command would have signalled an interrupt on command @@ -1137,17 +1138,13 @@ static void sata_fsl_host_intr(struct ata_port *ap) ioread32(hcr_base + CE)); for (i = 0; i < SATA_FSL_QUEUE_DEPTH; i++) { - if (done_mask & (1 << i)) { - qc = ata_qc_from_tag(ap, i); - if (qc) { - ata_qc_complete(qc); - } + if (done_mask & (1 << i)) DPRINTK ("completing ncq cmd,tag=%d,CC=0x%x,CA=0x%x\n", i, ioread32(hcr_base + CC), ioread32(hcr_base + CA)); - } } + ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); return; } else if ((ap->qc_active & (1 << ATA_TAG_INTERNAL))) { diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index a36149e..83a4447 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -614,7 +614,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class, writew(IDMA_CTL_RST_ATA, idma_ctl); readw(idma_ctl); /* flush */ - msleep(1); + ata_msleep(ap, 1); writew(0, idma_ctl); rc = sata_link_resume(link, timing, deadline); diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 8198259..bf74a36 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -2284,7 +2284,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc) } if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); return 0; } @@ -2743,18 +2743,11 @@ static void mv_err_intr(struct ata_port *ap) } } -static void mv_process_crpb_response(struct ata_port *ap, +static bool mv_process_crpb_response(struct ata_port *ap, struct mv_crpb *response, unsigned int tag, int ncq_enabled) { u8 ata_status; u16 edma_status = le16_to_cpu(response->flags); - struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); - - if (unlikely(!qc)) { - ata_port_printk(ap, KERN_ERR, "%s: no qc for tag=%d\n", - __func__, tag); - return; - } /* * edma_status from a response queue entry: @@ -2768,13 +2761,14 @@ static void mv_process_crpb_response(struct ata_port *ap, * Error will be seen/handled by * mv_err_intr(). So do nothing at all here. */ - return; + return false; } } ata_status = edma_status >> CRPB_FLAG_STATUS_SHIFT; if (!ac_err_mask(ata_status)) - ata_qc_complete(qc); + return true; /* else: leave it for mv_err_intr() */ + return false; } static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp) @@ -2783,6 +2777,7 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp struct mv_host_priv *hpriv = ap->host->private_data; u32 in_index; bool work_done = false; + u32 done_mask = 0; int ncq_enabled = (pp->pp_flags & MV_PP_FLAG_NCQ_EN); /* Get the hardware queue position index */ @@ -2803,15 +2798,19 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp /* Gen II/IIE: get command tag from CRPB entry */ tag = le16_to_cpu(response->id) & 0x1f; } - mv_process_crpb_response(ap, response, tag, ncq_enabled); + if (mv_process_crpb_response(ap, response, tag, ncq_enabled)) + done_mask |= 1 << tag; work_done = true; } - /* Update the software queue position index in hardware */ - if (work_done) + if (work_done) { + ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); + + /* Update the software queue position index in hardware */ writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) | (pp->resp_idx << EDMA_RSP_Q_PTR_SHIFT), port_mmio + EDMA_RSP_Q_OUT_PTR); + } } static void mv_port_intr(struct ata_port *ap, u32 port_cause) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index cb89ef8..7254e25 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -873,29 +873,11 @@ static int nv_adma_check_cpb(struct ata_port *ap, int cpb_num, int force_err) ata_port_freeze(ap); else ata_port_abort(ap); - return 1; + return -1; } - if (likely(flags & NV_CPB_RESP_DONE)) { - struct ata_queued_cmd *qc = ata_qc_from_tag(ap, cpb_num); - VPRINTK("CPB flags done, flags=0x%x\n", flags); - if (likely(qc)) { - DPRINTK("Completing qc from tag %d\n", cpb_num); - ata_qc_complete(qc); - } else { - struct ata_eh_info *ehi = &ap->link.eh_info; - /* Notifier bits set without a command may indicate the drive - is misbehaving. Raise host state machine violation on this - condition. */ - ata_port_printk(ap, KERN_ERR, - "notifier for tag %d with no cmd?\n", - cpb_num); - ehi->err_mask |= AC_ERR_HSM; - ehi->action |= ATA_EH_RESET; - ata_port_freeze(ap); - return 1; - } - } + if (likely(flags & NV_CPB_RESP_DONE)) + return 1; return 0; } @@ -1018,6 +1000,7 @@ static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance) NV_ADMA_STAT_CPBERR | NV_ADMA_STAT_CMD_COMPLETE)) { u32 check_commands = notifier_clears[i]; + u32 done_mask = 0; int pos, rc; if (status & NV_ADMA_STAT_CPBERR) { @@ -1034,10 +1017,13 @@ static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance) pos--; rc = nv_adma_check_cpb(ap, pos, notifier_error & (1 << pos)); - if (unlikely(rc)) + if (rc > 0) + done_mask |= 1 << pos; + else if (unlikely(rc < 0)) check_commands = 0; check_commands &= ~(1 << pos); } + ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); } } @@ -2132,7 +2118,6 @@ static int nv_swncq_sdbfis(struct ata_port *ap) struct ata_eh_info *ehi = &ap->link.eh_info; u32 sactive; u32 done_mask; - int i; u8 host_stat; u8 lack_dhfis = 0; @@ -2152,27 +2137,11 @@ static int nv_swncq_sdbfis(struct ata_port *ap) sactive = readl(pp->sactive_block); done_mask = pp->qc_active ^ sactive; - if (unlikely(done_mask & sactive)) { - ata_ehi_clear_desc(ehi); - ata_ehi_push_desc(ehi, "illegal SWNCQ:qc_active transition" - "(%08x->%08x)", pp->qc_active, sactive); - ehi->err_mask |= AC_ERR_HSM; - ehi->action |= ATA_EH_RESET; - return -EINVAL; - } - for (i = 0; i < ATA_MAX_QUEUE; i++) { - if (!(done_mask & (1 << i))) - continue; - - qc = ata_qc_from_tag(ap, i); - if (qc) { - ata_qc_complete(qc); - pp->qc_active &= ~(1 << i); - pp->dhfis_bits &= ~(1 << i); - pp->dmafis_bits &= ~(1 << i); - pp->sdbfis_bits |= (1 << i); - } - } + pp->qc_active &= ~done_mask; + pp->dhfis_bits &= ~done_mask; + pp->dmafis_bits &= ~done_mask; + pp->sdbfis_bits |= done_mask; + ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask); if (!ap->qc_active) { DPRINTK("over\n"); diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index be7726d..af41c6f 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -589,9 +589,9 @@ static int sil24_init_port(struct ata_port *ap) sil24_clear_pmp(ap); writel(PORT_CS_INIT, port + PORT_CTRL_STAT); - ata_wait_register(port + PORT_CTRL_STAT, + ata_wait_register(ap, port + PORT_CTRL_STAT, PORT_CS_INIT, PORT_CS_INIT, 10, 100); - tmp = ata_wait_register(port + PORT_CTRL_STAT, + tmp = ata_wait_register(ap, port + PORT_CTRL_STAT, PORT_CS_RDY, 0, 10, 100); if ((tmp & (PORT_CS_INIT | PORT_CS_RDY)) != PORT_CS_RDY) { @@ -631,7 +631,7 @@ static int sil24_exec_polled_cmd(struct ata_port *ap, int pmp, writel((u64)paddr >> 32, port + PORT_CMD_ACTIVATE + 4); irq_mask = (PORT_IRQ_COMPLETE | PORT_IRQ_ERROR) << PORT_IRQ_RAW_SHIFT; - irq_stat = ata_wait_register(port + PORT_IRQ_STAT, irq_mask, 0x0, + irq_stat = ata_wait_register(ap, port + PORT_IRQ_STAT, irq_mask, 0x0, 10, timeout_msec); writel(irq_mask, port + PORT_IRQ_STAT); /* clear IRQs */ @@ -719,9 +719,9 @@ static int sil24_hardreset(struct ata_link *link, unsigned int *class, "state, performing PORT_RST\n"); writel(PORT_CS_PORT_RST, port + PORT_CTRL_STAT); - msleep(10); + ata_msleep(ap, 10); writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR); - ata_wait_register(port + PORT_CTRL_STAT, PORT_CS_RDY, 0, + ata_wait_register(ap, port + PORT_CTRL_STAT, PORT_CS_RDY, 0, 10, 5000); /* restore port configuration */ @@ -740,7 +740,7 @@ static int sil24_hardreset(struct ata_link *link, unsigned int *class, tout_msec = 5000; writel(PORT_CS_DEV_RST, port + PORT_CTRL_STAT); - tmp = ata_wait_register(port + PORT_CTRL_STAT, + tmp = ata_wait_register(ap, port + PORT_CTRL_STAT, PORT_CS_DEV_RST, PORT_CS_DEV_RST, 10, tout_msec); @@ -1253,7 +1253,7 @@ static void sil24_init_controller(struct ata_host *host) tmp = readl(port + PORT_CTRL_STAT); if (tmp & PORT_CS_PORT_RST) { writel(PORT_CS_PORT_RST, port + PORT_CTRL_CLR); - tmp = ata_wait_register(port + PORT_CTRL_STAT, + tmp = ata_wait_register(NULL, port + PORT_CTRL_STAT, PORT_CS_PORT_RST, PORT_CS_PORT_RST, 10, 100); if (tmp & PORT_CS_PORT_RST) diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 4730c42..c215899 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -349,7 +349,7 @@ static int vt6420_prereset(struct ata_link *link, unsigned long deadline) /* wait for phy to become ready, if necessary */ do { - msleep(200); + ata_msleep(link->ap, 200); svia_scr_read(link, SCR_STATUS, &sstatus); if ((sstatus & 0xf) != 1) break; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 5419a49..276d5a7 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -59,6 +59,7 @@ void device_pm_init(struct device *dev) { dev->power.status = DPM_ON; init_completion(&dev->power.completion); + complete_all(&dev->power.completion); dev->power.wakeup_count = 0; pm_runtime_init(dev); } diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 76f114f..ead8b77 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -114,8 +114,6 @@ static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it does module_param(fd_def_df0, ulong, 0); MODULE_LICENSE("GPL"); -static struct request_queue *floppy_queue; - /* * Macros */ @@ -164,6 +162,7 @@ static volatile int selected = -1; /* currently selected drive */ static int writepending; static int writefromint; static char *raw_buf; +static int fdc_queue; static DEFINE_SPINLOCK(amiflop_lock); @@ -1334,6 +1333,42 @@ static int get_track(int drive, int track) return -1; } +/* + * Round-robin between our available drives, doing one request from each + */ +static struct request *set_next_request(void) +{ + struct request_queue *q; + int cnt = FD_MAX_UNITS; + struct request *rq; + + /* Find next queue we can dispatch from */ + fdc_queue = fdc_queue + 1; + if (fdc_queue == FD_MAX_UNITS) + fdc_queue = 0; + + for(cnt = FD_MAX_UNITS; cnt > 0, cnt--) { + + if (unit[fdc_queue].type->code == FD_NODRIVE) { + if (++fdc_queue == FD_MAX_UNITS) + fdc_queue = 0; + cotinue; + } + + q = unit[fdc_queue].gendisk->queue; + if (q) { + rq = blk_fetch_request(q); + if (rq) + break; + } + + if (++fdc_queue == FD_MAX_UNITS) + fdc_queue = 0; + } + + return rq; +} + static void redo_fd_request(void) { struct request *rq; @@ -1345,7 +1380,7 @@ static void redo_fd_request(void) int err; next_req: - rq = blk_fetch_request(floppy_queue); + rq = set_next_request(); if (!rq) { /* Nothing left to do */ return; @@ -1682,6 +1717,13 @@ static int __init fd_probe_drives(void) continue; } unit[drive].gendisk = disk; + + disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); + if (!disk->queue) { + unit[drive].type->code = FD_NODRIVE; + continue; + } + drives++; if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) { printk("no mem for "); @@ -1695,7 +1737,6 @@ static int __init fd_probe_drives(void) disk->fops = &floppy_fops; sprintf(disk->disk_name, "fd%d", drive); disk->private_data = &unit[drive]; - disk->queue = floppy_queue; set_capacity(disk, 880*2); add_disk(disk); } @@ -1743,11 +1784,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev) goto out_irq2; } - ret = -ENOMEM; - floppy_queue = blk_init_queue(do_fd_request, &amiflop_lock); - if (!floppy_queue) - goto out_queue; - ret = -ENODEV; if (fd_probe_drives() < 1) /* No usable drives */ goto out_probe; @@ -1791,7 +1827,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev) return 0; out_probe: - blk_cleanup_queue(floppy_queue); out_queue: free_irq(IRQ_AMIGA_CIAA_TB, NULL); out_irq2: @@ -1810,9 +1845,12 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev) for( i = 0; i < FD_MAX_UNITS; i++) { if (unit[i].type->code != FD_NODRIVE) { + struct request_queue *q = unit[i].gendisk->queue; del_gendisk(unit[i].gendisk); put_disk(unit[i].gendisk); kfree(unit[i].trackbuf); + if (q) + blk_cleanup_queue(q); } } blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); @@ -1820,7 +1858,6 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev) free_irq(IRQ_AMIGA_DSKBLK, NULL); custom.dmacon = DMAF_DISK; /* disable DMA */ amiga_chip_free(raw_buf); - blk_cleanup_queue(floppy_queue); unregister_blkdev(FLOPPY_MAJOR, "fd"); } #endif diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index aceb964..0f4eec4 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -79,8 +79,8 @@ #undef DEBUG -static struct request_queue *floppy_queue; static struct request *fd_request; +static int fdc_queue; /* Disk types: DD, HD, ED */ static struct atari_disk_type { @@ -1391,6 +1391,29 @@ static void setup_req_params( int drive ) ReqTrack, ReqSector, (unsigned long)ReqData )); } +/* + * Round-robin between our available drives, doing one request from each + */ +static struct request *set_next_request(void) +{ + struct request_queue *q; + int old_pos = fdc_queue; + struct request *rq; + + do { + q = unit[fdc_queue].disk->queue; + if (++fdc_queue == FD_MAX_UNITS) + fdc_queue = 0; + if (q) { + rq = blk_fetch_request(q); + if (rq) + break; + } + } while (fdc_queue != old_pos); + + return rq; +} + static void redo_fd_request(void) { @@ -1405,7 +1428,7 @@ static void redo_fd_request(void) repeat: if (!fd_request) { - fd_request = blk_fetch_request(floppy_queue); + fd_request = set_next_request(); if (!fd_request) goto the_end; } @@ -1932,10 +1955,6 @@ static int __init atari_floppy_init (void) PhysTrackBuffer = virt_to_phys(TrackBuffer); BufferDrive = BufferSide = BufferTrack = -1; - floppy_queue = blk_init_queue(do_fd_request, &ataflop_lock); - if (!floppy_queue) - goto Enomem; - for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].track = -1; unit[i].flags = 0; @@ -1944,7 +1963,10 @@ static int __init atari_floppy_init (void) sprintf(unit[i].disk->disk_name, "fd%d", i); unit[i].disk->fops = &floppy_fops; unit[i].disk->private_data = &unit[i]; - unit[i].disk->queue = floppy_queue; + unit[i].disk->queue = blk_init_queue(do_fd_request, + &ataflop_lock); + if (!unit[i].disk->queue) + goto Enomem; set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); add_disk(unit[i].disk); } @@ -1959,10 +1981,14 @@ static int __init atari_floppy_init (void) return 0; Enomem: - while (i--) + while (i--) { + struct request_queue *q = unit[i].disk->queue; + put_disk(unit[i].disk); - if (floppy_queue) - blk_cleanup_queue(floppy_queue); + if (q) + blk_cleanup_queue(q); + } + unregister_blkdev(FLOPPY_MAJOR, "fd"); return -ENOMEM; } @@ -2011,12 +2037,14 @@ static void __exit atari_floppy_exit(void) int i; blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); for (i = 0; i < FD_MAX_UNITS; i++) { + struct request_queue *q = unit[i].disk->queue; + del_gendisk(unit[i].disk); put_disk(unit[i].disk); + blk_cleanup_queue(q); } unregister_blkdev(FLOPPY_MAJOR, "fd"); - blk_cleanup_queue(floppy_queue); del_timer_sync(&fd_timer); atari_stram_free( DMABuffer ); } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 1c7f637..fa33f97 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -482,7 +482,6 @@ static struct brd_device *brd_alloc(int i) if (!brd->brd_queue) goto out_free_dev; blk_queue_make_request(brd->brd_queue, brd_make_request); - blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG); blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 31064df..39d62eb 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -297,6 +297,8 @@ static void enqueue_cmd_and_start_io(ctlr_info_t *h, spin_lock_irqsave(&h->lock, flags); addQ(&h->reqQ, c); h->Qdepth++; + if (h->Qdepth > h->maxQsinceinit) + h->maxQsinceinit = h->Qdepth; start_io(h); spin_unlock_irqrestore(&h->lock, flags); } @@ -1230,470 +1232,452 @@ static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c) c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) (void)check_for_unit_attention(h, c); } -/* - * ioctl - */ -static int cciss_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) + +static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp) { - struct gendisk *disk = bdev->bd_disk; - ctlr_info_t *h = get_host(disk); - drive_info_struct *drv = get_drv(disk); - void __user *argp = (void __user *)arg; + cciss_pci_info_struct pciinfo; - dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n", - cmd, arg); - switch (cmd) { - case CCISS_GETPCIINFO: - { - cciss_pci_info_struct pciinfo; - - if (!arg) - return -EINVAL; - pciinfo.domain = pci_domain_nr(h->pdev->bus); - pciinfo.bus = h->pdev->bus->number; - pciinfo.dev_fn = h->pdev->devfn; - pciinfo.board_id = h->board_id; - if (copy_to_user - (argp, &pciinfo, sizeof(cciss_pci_info_struct))) - return -EFAULT; - return 0; - } - case CCISS_GETINTINFO: - { - cciss_coalint_struct intinfo; - if (!arg) - return -EINVAL; - intinfo.delay = - readl(&h->cfgtable->HostWrite.CoalIntDelay); - intinfo.count = - readl(&h->cfgtable->HostWrite.CoalIntCount); - if (copy_to_user - (argp, &intinfo, sizeof(cciss_coalint_struct))) - return -EFAULT; - return 0; - } - case CCISS_SETINTINFO: - { - cciss_coalint_struct intinfo; - unsigned long flags; - int i; - - if (!arg) - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user - (&intinfo, argp, sizeof(cciss_coalint_struct))) - return -EFAULT; - if ((intinfo.delay == 0) && (intinfo.count == 0)) - return -EINVAL; - spin_lock_irqsave(&h->lock, flags); - /* Update the field, and then ring the doorbell */ - writel(intinfo.delay, - &(h->cfgtable->HostWrite.CoalIntDelay)); - writel(intinfo.count, - &(h->cfgtable->HostWrite.CoalIntCount)); - writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); - - for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) { - if (!(readl(h->vaddr + SA5_DOORBELL) - & CFGTBL_ChangeReq)) - break; - /* delay and try again */ - udelay(1000); - } - spin_unlock_irqrestore(&h->lock, flags); - if (i >= MAX_IOCTL_CONFIG_WAIT) - return -EAGAIN; - return 0; - } - case CCISS_GETNODENAME: - { - NodeName_type NodeName; - int i; - - if (!arg) - return -EINVAL; - for (i = 0; i < 16; i++) - NodeName[i] = - readb(&h->cfgtable->ServerName[i]); - if (copy_to_user(argp, NodeName, sizeof(NodeName_type))) - return -EFAULT; - return 0; - } - case CCISS_SETNODENAME: - { - NodeName_type NodeName; - unsigned long flags; - int i; + if (!argp) + return -EINVAL; + pciinfo.domain = pci_domain_nr(h->pdev->bus); + pciinfo.bus = h->pdev->bus->number; + pciinfo.dev_fn = h->pdev->devfn; + pciinfo.board_id = h->board_id; + if (copy_to_user(argp, &pciinfo, sizeof(cciss_pci_info_struct))) + return -EFAULT; + return 0; +} - if (!arg) - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; +static int cciss_getintinfo(ctlr_info_t *h, void __user *argp) +{ + cciss_coalint_struct intinfo; - if (copy_from_user - (NodeName, argp, sizeof(NodeName_type))) - return -EFAULT; + if (!argp) + return -EINVAL; + intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay); + intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount); + if (copy_to_user + (argp, &intinfo, sizeof(cciss_coalint_struct))) + return -EFAULT; + return 0; +} - spin_lock_irqsave(&h->lock, flags); +static int cciss_setintinfo(ctlr_info_t *h, void __user *argp) +{ + cciss_coalint_struct intinfo; + unsigned long flags; + int i; - /* Update the field, and then ring the doorbell */ - for (i = 0; i < 16; i++) - writeb(NodeName[i], - &h->cfgtable->ServerName[i]); + if (!argp) + return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&intinfo, argp, sizeof(intinfo))) + return -EFAULT; + if ((intinfo.delay == 0) && (intinfo.count == 0)) + return -EINVAL; + spin_lock_irqsave(&h->lock, flags); + /* Update the field, and then ring the doorbell */ + writel(intinfo.delay, &(h->cfgtable->HostWrite.CoalIntDelay)); + writel(intinfo.count, &(h->cfgtable->HostWrite.CoalIntCount)); + writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); - writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); + for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) { + if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq)) + break; + udelay(1000); /* delay and try again */ + } + spin_unlock_irqrestore(&h->lock, flags); + if (i >= MAX_IOCTL_CONFIG_WAIT) + return -EAGAIN; + return 0; +} - for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) { - if (!(readl(h->vaddr + SA5_DOORBELL) - & CFGTBL_ChangeReq)) - break; - /* delay and try again */ - udelay(1000); - } - spin_unlock_irqrestore(&h->lock, flags); - if (i >= MAX_IOCTL_CONFIG_WAIT) - return -EAGAIN; - return 0; - } +static int cciss_getnodename(ctlr_info_t *h, void __user *argp) +{ + NodeName_type NodeName; + int i; - case CCISS_GETHEARTBEAT: - { - Heartbeat_type heartbeat; - - if (!arg) - return -EINVAL; - heartbeat = readl(&h->cfgtable->HeartBeat); - if (copy_to_user - (argp, &heartbeat, sizeof(Heartbeat_type))) - return -EFAULT; - return 0; - } - case CCISS_GETBUSTYPES: - { - BusTypes_type BusTypes; - - if (!arg) - return -EINVAL; - BusTypes = readl(&h->cfgtable->BusTypes); - if (copy_to_user - (argp, &BusTypes, sizeof(BusTypes_type))) - return -EFAULT; - return 0; - } - case CCISS_GETFIRMVER: - { - FirmwareVer_type firmware; + if (!argp) + return -EINVAL; + for (i = 0; i < 16; i++) + NodeName[i] = readb(&h->cfgtable->ServerName[i]); + if (copy_to_user(argp, NodeName, sizeof(NodeName_type))) + return -EFAULT; + return 0; +} - if (!arg) - return -EINVAL; - memcpy(firmware, h->firm_ver, 4); +static int cciss_setnodename(ctlr_info_t *h, void __user *argp) +{ + NodeName_type NodeName; + unsigned long flags; + int i; - if (copy_to_user - (argp, firmware, sizeof(FirmwareVer_type))) - return -EFAULT; - return 0; - } - case CCISS_GETDRIVVER: - { - DriverVer_type DriverVer = DRIVER_VERSION; + if (!argp) + return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(NodeName, argp, sizeof(NodeName_type))) + return -EFAULT; + spin_lock_irqsave(&h->lock, flags); + /* Update the field, and then ring the doorbell */ + for (i = 0; i < 16; i++) + writeb(NodeName[i], &h->cfgtable->ServerName[i]); + writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL); + for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) { + if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq)) + break; + udelay(1000); /* delay and try again */ + } + spin_unlock_irqrestore(&h->lock, flags); + if (i >= MAX_IOCTL_CONFIG_WAIT) + return -EAGAIN; + return 0; +} - if (!arg) - return -EINVAL; +static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) +{ + Heartbeat_type heartbeat; - if (copy_to_user - (argp, &DriverVer, sizeof(DriverVer_type))) - return -EFAULT; - return 0; - } + if (!argp) + return -EINVAL; + heartbeat = readl(&h->cfgtable->HeartBeat); + if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type))) + return -EFAULT; + return 0; +} - case CCISS_DEREGDISK: - case CCISS_REGNEWD: - case CCISS_REVALIDVOLS: - return rebuild_lun_table(h, 0, 1); +static int cciss_getbustypes(ctlr_info_t *h, void __user *argp) +{ + BusTypes_type BusTypes; + + if (!argp) + return -EINVAL; + BusTypes = readl(&h->cfgtable->BusTypes); + if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type))) + return -EFAULT; + return 0; +} - case CCISS_GETLUNINFO:{ - LogvolInfo_struct luninfo; +static int cciss_getfirmver(ctlr_info_t *h, void __user *argp) +{ + FirmwareVer_type firmware; - memcpy(&luninfo.LunID, drv->LunID, - sizeof(luninfo.LunID)); - luninfo.num_opens = drv->usage_count; - luninfo.num_parts = 0; - if (copy_to_user(argp, &luninfo, - sizeof(LogvolInfo_struct))) - return -EFAULT; - return 0; + if (!argp) + return -EINVAL; + memcpy(firmware, h->firm_ver, 4); + + if (copy_to_user + (argp, firmware, sizeof(FirmwareVer_type))) + return -EFAULT; + return 0; +} + +static int cciss_getdrivver(ctlr_info_t *h, void __user *argp) +{ + DriverVer_type DriverVer = DRIVER_VERSION; + + if (!argp) + return -EINVAL; + if (copy_to_user(argp, &DriverVer, sizeof(DriverVer_type))) + return -EFAULT; + return 0; +} + +static int cciss_getluninfo(ctlr_info_t *h, + struct gendisk *disk, void __user *argp) +{ + LogvolInfo_struct luninfo; + drive_info_struct *drv = get_drv(disk); + + if (!argp) + return -EINVAL; + memcpy(&luninfo.LunID, drv->LunID, sizeof(luninfo.LunID)); + luninfo.num_opens = drv->usage_count; + luninfo.num_parts = 0; + if (copy_to_user(argp, &luninfo, sizeof(LogvolInfo_struct))) + return -EFAULT; + return 0; +} + +static int cciss_passthru(ctlr_info_t *h, void __user *argp) +{ + IOCTL_Command_struct iocommand; + CommandList_struct *c; + char *buff = NULL; + u64bit temp64; + DECLARE_COMPLETION_ONSTACK(wait); + + if (!argp) + return -EINVAL; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + if (copy_from_user + (&iocommand, argp, sizeof(IOCTL_Command_struct))) + return -EFAULT; + if ((iocommand.buf_size < 1) && + (iocommand.Request.Type.Direction != XFER_NONE)) { + return -EINVAL; + } + if (iocommand.buf_size > 0) { + buff = kmalloc(iocommand.buf_size, GFP_KERNEL); + if (buff == NULL) + return -EFAULT; + } + if (iocommand.Request.Type.Direction == XFER_WRITE) { + /* Copy the data into the buffer we created */ + if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) { + kfree(buff); + return -EFAULT; } - case CCISS_PASSTHRU: - { - IOCTL_Command_struct iocommand; - CommandList_struct *c; - char *buff = NULL; - u64bit temp64; - DECLARE_COMPLETION_ONSTACK(wait); - - if (!arg) - return -EINVAL; - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (copy_from_user - (&iocommand, argp, sizeof(IOCTL_Command_struct))) - return -EFAULT; - if ((iocommand.buf_size < 1) && - (iocommand.Request.Type.Direction != XFER_NONE)) { - return -EINVAL; - } -#if 0 /* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */ - /* Check kmalloc limits */ - if (iocommand.buf_size > 128000) - return -EINVAL; -#endif - if (iocommand.buf_size > 0) { - buff = kmalloc(iocommand.buf_size, GFP_KERNEL); - if (buff == NULL) - return -EFAULT; - } - if (iocommand.Request.Type.Direction == XFER_WRITE) { - /* Copy the data into the buffer we created */ - if (copy_from_user - (buff, iocommand.buf, iocommand.buf_size)) { - kfree(buff); - return -EFAULT; - } - } else { - memset(buff, 0, iocommand.buf_size); - } - c = cmd_special_alloc(h); - if (!c) { - kfree(buff); - return -ENOMEM; - } - /* Fill in the command type */ - c->cmd_type = CMD_IOCTL_PEND; - /* Fill in Command Header */ - c->Header.ReplyQueue = 0; /* unused in simple mode */ - if (iocommand.buf_size > 0) /* buffer to fill */ - { - c->Header.SGList = 1; - c->Header.SGTotal = 1; - } else /* no buffers to fill */ - { - c->Header.SGList = 0; - c->Header.SGTotal = 0; - } - c->Header.LUN = iocommand.LUN_info; - /* use the kernel address the cmd block for tag */ - c->Header.Tag.lower = c->busaddr; - - /* Fill in Request block */ - c->Request = iocommand.Request; - - /* Fill in the scatter gather information */ - if (iocommand.buf_size > 0) { - temp64.val = pci_map_single(h->pdev, buff, - iocommand.buf_size, - PCI_DMA_BIDIRECTIONAL); - c->SG[0].Addr.lower = temp64.val32.lower; - c->SG[0].Addr.upper = temp64.val32.upper; - c->SG[0].Len = iocommand.buf_size; - c->SG[0].Ext = 0; /* we are not chaining */ - } - c->waiting = &wait; + } else { + memset(buff, 0, iocommand.buf_size); + } + c = cmd_special_alloc(h); + if (!c) { + kfree(buff); + return -ENOMEM; + } + /* Fill in the command type */ + c->cmd_type = CMD_IOCTL_PEND; + /* Fill in Command Header */ + c->Header.ReplyQueue = 0; /* unused in simple mode */ + if (iocommand.buf_size > 0) { /* buffer to fill */ + c->Header.SGList = 1; + c->Header.SGTotal = 1; + } else { /* no buffers to fill */ + c->Header.SGList = 0; + c->Header.SGTotal = 0; + } + c->Header.LUN = iocommand.LUN_info; + /* use the kernel address the cmd block for tag */ + c->Header.Tag.lower = c->busaddr; - enqueue_cmd_and_start_io(h, c); - wait_for_completion(&wait); + /* Fill in Request block */ + c->Request = iocommand.Request; - /* unlock the buffers from DMA */ - temp64.val32.lower = c->SG[0].Addr.lower; - temp64.val32.upper = c->SG[0].Addr.upper; - pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, - iocommand.buf_size, - PCI_DMA_BIDIRECTIONAL); + /* Fill in the scatter gather information */ + if (iocommand.buf_size > 0) { + temp64.val = pci_map_single(h->pdev, buff, + iocommand.buf_size, PCI_DMA_BIDIRECTIONAL); + c->SG[0].Addr.lower = temp64.val32.lower; + c->SG[0].Addr.upper = temp64.val32.upper; + c->SG[0].Len = iocommand.buf_size; + c->SG[0].Ext = 0; /* we are not chaining */ + } + c->waiting = &wait; - check_ioctl_unit_attention(h, c); + enqueue_cmd_and_start_io(h, c); + wait_for_completion(&wait); - /* Copy the error information out */ - iocommand.error_info = *(c->err_info); - if (copy_to_user - (argp, &iocommand, sizeof(IOCTL_Command_struct))) { - kfree(buff); - cmd_special_free(h, c); - return -EFAULT; - } + /* unlock the buffers from DMA */ + temp64.val32.lower = c->SG[0].Addr.lower; + temp64.val32.upper = c->SG[0].Addr.upper; + pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, iocommand.buf_size, + PCI_DMA_BIDIRECTIONAL); + check_ioctl_unit_attention(h, c); + + /* Copy the error information out */ + iocommand.error_info = *(c->err_info); + if (copy_to_user(argp, &iocommand, sizeof(IOCTL_Command_struct))) { + kfree(buff); + cmd_special_free(h, c); + return -EFAULT; + } - if (iocommand.Request.Type.Direction == XFER_READ) { - /* Copy the data out of the buffer we created */ - if (copy_to_user - (iocommand.buf, buff, iocommand.buf_size)) { - kfree(buff); - cmd_special_free(h, c); - return -EFAULT; - } - } + if (iocommand.Request.Type.Direction == XFER_READ) { + /* Copy the data out of the buffer we created */ + if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) { kfree(buff); cmd_special_free(h, c); - return 0; + return -EFAULT; } - case CCISS_BIG_PASSTHRU:{ - BIG_IOCTL_Command_struct *ioc; - CommandList_struct *c; - unsigned char **buff = NULL; - int *buff_size = NULL; - u64bit temp64; - BYTE sg_used = 0; - int status = 0; - int i; - DECLARE_COMPLETION_ONSTACK(wait); - __u32 left; - __u32 sz; - BYTE __user *data_ptr; - - if (!arg) - return -EINVAL; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - ioc = (BIG_IOCTL_Command_struct *) - kmalloc(sizeof(*ioc), GFP_KERNEL); - if (!ioc) { - status = -ENOMEM; - goto cleanup1; - } - if (copy_from_user(ioc, argp, sizeof(*ioc))) { + } + kfree(buff); + cmd_special_free(h, c); + return 0; +} + +static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp) +{ + BIG_IOCTL_Command_struct *ioc; + CommandList_struct *c; + unsigned char **buff = NULL; + int *buff_size = NULL; + u64bit temp64; + BYTE sg_used = 0; + int status = 0; + int i; + DECLARE_COMPLETION_ONSTACK(wait); + __u32 left; + __u32 sz; + BYTE __user *data_ptr; + + if (!argp) + return -EINVAL; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + ioc = (BIG_IOCTL_Command_struct *) + kmalloc(sizeof(*ioc), GFP_KERNEL); + if (!ioc) { + status = -ENOMEM; + goto cleanup1; + } + if (copy_from_user(ioc, argp, sizeof(*ioc))) { + status = -EFAULT; + goto cleanup1; + } + if ((ioc->buf_size < 1) && + (ioc->Request.Type.Direction != XFER_NONE)) { + status = -EINVAL; + goto cleanup1; + } + /* Check kmalloc limits using all SGs */ + if (ioc->malloc_size > MAX_KMALLOC_SIZE) { + status = -EINVAL; + goto cleanup1; + } + if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) { + status = -EINVAL; + goto cleanup1; + } + buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL); + if (!buff) { + status = -ENOMEM; + goto cleanup1; + } + buff_size = kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL); + if (!buff_size) { + status = -ENOMEM; + goto cleanup1; + } + left = ioc->buf_size; + data_ptr = ioc->buf; + while (left) { + sz = (left > ioc->malloc_size) ? ioc->malloc_size : left; + buff_size[sg_used] = sz; + buff[sg_used] = kmalloc(sz, GFP_KERNEL); + if (buff[sg_used] == NULL) { + status = -ENOMEM; + goto cleanup1; + } + if (ioc->Request.Type.Direction == XFER_WRITE) { + if (copy_from_user(buff[sg_used], data_ptr, sz)) { status = -EFAULT; goto cleanup1; } - if ((ioc->buf_size < 1) && - (ioc->Request.Type.Direction != XFER_NONE)) { - status = -EINVAL; - goto cleanup1; - } - /* Check kmalloc limits using all SGs */ - if (ioc->malloc_size > MAX_KMALLOC_SIZE) { - status = -EINVAL; - goto cleanup1; - } - if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) { - status = -EINVAL; - goto cleanup1; - } - buff = - kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL); - if (!buff) { - status = -ENOMEM; - goto cleanup1; - } - buff_size = kmalloc(MAXSGENTRIES * sizeof(int), - GFP_KERNEL); - if (!buff_size) { - status = -ENOMEM; - goto cleanup1; - } - left = ioc->buf_size; - data_ptr = ioc->buf; - while (left) { - sz = (left > - ioc->malloc_size) ? ioc-> - malloc_size : left; - buff_size[sg_used] = sz; - buff[sg_used] = kmalloc(sz, GFP_KERNEL); - if (buff[sg_used] == NULL) { - status = -ENOMEM; - goto cleanup1; - } - if (ioc->Request.Type.Direction == XFER_WRITE) { - if (copy_from_user - (buff[sg_used], data_ptr, sz)) { - status = -EFAULT; - goto cleanup1; - } - } else { - memset(buff[sg_used], 0, sz); - } - left -= sz; - data_ptr += sz; - sg_used++; - } - c = cmd_special_alloc(h); - if (!c) { - status = -ENOMEM; - goto cleanup1; - } - c->cmd_type = CMD_IOCTL_PEND; - c->Header.ReplyQueue = 0; + } else { + memset(buff[sg_used], 0, sz); + } + left -= sz; + data_ptr += sz; + sg_used++; + } + c = cmd_special_alloc(h); + if (!c) { + status = -ENOMEM; + goto cleanup1; + } + c->cmd_type = CMD_IOCTL_PEND; + c->Header.ReplyQueue = 0; + c->Header.SGList = sg_used; + c->Header.SGTotal = sg_used; + c->Header.LUN = ioc->LUN_info; + c->Header.Tag.lower = c->busaddr; - if (ioc->buf_size > 0) { - c->Header.SGList = sg_used; - c->Header.SGTotal = sg_used; - } else { - c->Header.SGList = 0; - c->Header.SGTotal = 0; - } - c->Header.LUN = ioc->LUN_info; - c->Header.Tag.lower = c->busaddr; - - c->Request = ioc->Request; - if (ioc->buf_size > 0) { - for (i = 0; i < sg_used; i++) { - temp64.val = - pci_map_single(h->pdev, buff[i], - buff_size[i], - PCI_DMA_BIDIRECTIONAL); - c->SG[i].Addr.lower = - temp64.val32.lower; - c->SG[i].Addr.upper = - temp64.val32.upper; - c->SG[i].Len = buff_size[i]; - c->SG[i].Ext = 0; /* we are not chaining */ - } - } - c->waiting = &wait; - enqueue_cmd_and_start_io(h, c); - wait_for_completion(&wait); - /* unlock the buffers from DMA */ - for (i = 0; i < sg_used; i++) { - temp64.val32.lower = c->SG[i].Addr.lower; - temp64.val32.upper = c->SG[i].Addr.upper; - pci_unmap_single(h->pdev, - (dma_addr_t) temp64.val, buff_size[i], - PCI_DMA_BIDIRECTIONAL); - } - check_ioctl_unit_attention(h, c); - /* Copy the error information out */ - ioc->error_info = *(c->err_info); - if (copy_to_user(argp, ioc, sizeof(*ioc))) { + c->Request = ioc->Request; + for (i = 0; i < sg_used; i++) { + temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i], + PCI_DMA_BIDIRECTIONAL); + c->SG[i].Addr.lower = temp64.val32.lower; + c->SG[i].Addr.upper = temp64.val32.upper; + c->SG[i].Len = buff_size[i]; + c->SG[i].Ext = 0; /* we are not chaining */ + } + c->waiting = &wait; + enqueue_cmd_and_start_io(h, c); + wait_for_completion(&wait); + /* unlock the buffers from DMA */ + for (i = 0; i < sg_used; i++) { + temp64.val32.lower = c->SG[i].Addr.lower; + temp64.val32.upper = c->SG[i].Addr.upper; + pci_unmap_single(h->pdev, + (dma_addr_t) temp64.val, buff_size[i], + PCI_DMA_BIDIRECTIONAL); + } + check_ioctl_unit_attention(h, c); + /* Copy the error information out */ + ioc->error_info = *(c->err_info); + if (copy_to_user(argp, ioc, sizeof(*ioc))) { + cmd_special_free(h, c); + status = -EFAULT; + goto cleanup1; + } + if (ioc->Request.Type.Direction == XFER_READ) { + /* Copy the data out of the buffer we created */ + BYTE __user *ptr = ioc->buf; + for (i = 0; i < sg_used; i++) { + if (copy_to_user(ptr, buff[i], buff_size[i])) { cmd_special_free(h, c); status = -EFAULT; goto cleanup1; } - if (ioc->Request.Type.Direction == XFER_READ) { - /* Copy the data out of the buffer we created */ - BYTE __user *ptr = ioc->buf; - for (i = 0; i < sg_used; i++) { - if (copy_to_user - (ptr, buff[i], buff_size[i])) { - cmd_special_free(h, c); - status = -EFAULT; - goto cleanup1; - } - ptr += buff_size[i]; - } - } - cmd_special_free(h, c); - status = 0; - cleanup1: - if (buff) { - for (i = 0; i < sg_used; i++) - kfree(buff[i]); - kfree(buff); - } - kfree(buff_size); - kfree(ioc); - return status; + ptr += buff_size[i]; } + } + cmd_special_free(h, c); + status = 0; +cleanup1: + if (buff) { + for (i = 0; i < sg_used; i++) + kfree(buff[i]); + kfree(buff); + } + kfree(buff_size); + kfree(ioc); + return status; +} + +static int cciss_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) +{ + struct gendisk *disk = bdev->bd_disk; + ctlr_info_t *h = get_host(disk); + void __user *argp = (void __user *)arg; + + dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n", + cmd, arg); + switch (cmd) { + case CCISS_GETPCIINFO: + return cciss_getpciinfo(h, argp); + case CCISS_GETINTINFO: + return cciss_getintinfo(h, argp); + case CCISS_SETINTINFO: + return cciss_setintinfo(h, argp); + case CCISS_GETNODENAME: + return cciss_getnodename(h, argp); + case CCISS_SETNODENAME: + return cciss_setnodename(h, argp); + case CCISS_GETHEARTBEAT: + return cciss_getheartbeat(h, argp); + case CCISS_GETBUSTYPES: + return cciss_getbustypes(h, argp); + case CCISS_GETFIRMVER: + return cciss_getfirmver(h, argp); + case CCISS_GETDRIVVER: + return cciss_getdrivver(h, argp); + case CCISS_DEREGDISK: + case CCISS_REGNEWD: + case CCISS_REVALIDVOLS: + return rebuild_lun_table(h, 0, 1); + case CCISS_GETLUNINFO: + return cciss_getluninfo(h, disk, argp); + case CCISS_PASSTHRU: + return cciss_passthru(h, argp); + case CCISS_BIG_PASSTHRU: + return cciss_bigpassthru(h, argp); /* scsi_cmd_ioctl handles these, below, though some are not */ /* very meaningful for cciss. SG_IO is the main one people want. */ @@ -4519,6 +4503,12 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev) misc_fw_support = readl(&cfgtable->misc_fw_support); use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; + /* The doorbell reset seems to cause lockups on some Smart + * Arrays (e.g. P410, P410i, maybe others). Until this is + * fixed or at least isolated, avoid the doorbell reset. + */ + use_doorbell = 0; + rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell); if (rc) goto unmap_cfgtable; @@ -4712,6 +4702,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, h->scatter_list = kmalloc(h->max_commands * sizeof(struct scatterlist *), GFP_KERNEL); + if (!h->scatter_list) + goto clean4; + for (k = 0; k < h->nr_cmds; k++) { h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) * h->maxsgentries, @@ -4781,7 +4774,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, clean4: kfree(h->cmd_pool_bits); /* Free up sg elements */ - for (k = 0; k < h->nr_cmds; k++) + for (k-- ; k >= 0; k--) kfree(h->scatter_list[k]); kfree(h->scatter_list); cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 352441b..c2ef476 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -2321,8 +2321,7 @@ static inline void drbd_md_flush(struct drbd_conf *mdev) if (test_bit(MD_NO_BARRIER, &mdev->flags)) return; - r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL); if (r) { set_bit(MD_NO_BARRIER, &mdev->flags); dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 081522d..ac89684 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -975,7 +975,7 @@ static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct d if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) { rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, - NULL, BLKDEV_IFL_WAIT); + NULL); if (rv) { dev_err(DEV, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. @@ -2972,7 +2972,6 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) * we still need to figure out whether we accept that. */ mdev->p_size = p_size; -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) if (get_ldev(mdev)) { warn_if_differ_considerably(mdev, "lower level device sizes", p_size, drbd_get_max_capacity(mdev->ldev)); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index cf04c1b..aa42e77 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -258,8 +258,8 @@ static int irqdma_allocated; #include static struct request *current_req; -static struct request_queue *floppy_queue; static void do_fd_request(struct request_queue *q); +static int set_next_request(void); #ifndef fd_get_dma_residue #define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA) @@ -413,6 +413,7 @@ static struct gendisk *disks[N_DRIVE]; static struct block_device *opened_bdev[N_DRIVE]; static DEFINE_MUTEX(open_lock); static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; +static int fdc_queue; /* * This struct defines the different floppy types. @@ -890,8 +891,8 @@ static void unlock_fdc(void) del_timer(&fd_timeout); cont = NULL; clear_bit(0, &fdc_busy); - if (current_req || blk_peek_request(floppy_queue)) - do_fd_request(floppy_queue); + if (current_req || set_next_request()) + do_fd_request(current_req->q); spin_unlock_irqrestore(&floppy_lock, flags); wake_up(&fdc_wait); } @@ -2243,8 +2244,8 @@ static void floppy_end_request(struct request *req, int error) * logical buffer */ static void request_done(int uptodate) { - struct request_queue *q = floppy_queue; struct request *req = current_req; + struct request_queue *q; unsigned long flags; int block; char msg[sizeof("request done ") + sizeof(int) * 3]; @@ -2258,6 +2259,8 @@ static void request_done(int uptodate) return; } + q = req->q; + if (uptodate) { /* maintain values for invalidation on geometry * change */ @@ -2811,6 +2814,28 @@ static int make_raw_rw_request(void) return 2; } +/* + * Round-robin between our available drives, doing one request from each + */ +static int set_next_request(void) +{ + struct request_queue *q; + int old_pos = fdc_queue; + + do { + q = disks[fdc_queue]->queue; + if (++fdc_queue == N_DRIVE) + fdc_queue = 0; + if (q) { + current_req = blk_fetch_request(q); + if (current_req) + break; + } + } while (fdc_queue != old_pos); + + return current_req != NULL; +} + static void redo_fd_request(void) { int drive; @@ -2822,17 +2847,17 @@ static void redo_fd_request(void) do_request: if (!current_req) { - struct request *req; + int pending; + + spin_lock_irq(&floppy_lock); + pending = set_next_request(); + spin_unlock_irq(&floppy_lock); - spin_lock_irq(floppy_queue->queue_lock); - req = blk_fetch_request(floppy_queue); - spin_unlock_irq(floppy_queue->queue_lock); - if (!req) { + if (!pending) { do_floppy = NULL; unlock_fdc(); return; } - current_req = req; } drive = (long)current_req->rq_disk->private_data; set_fdc(drive); @@ -4165,6 +4190,13 @@ static int __init floppy_init(void) goto out_put_disk; } + disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock); + if (!disks[dr]->queue) { + err = -ENOMEM; + goto out_put_disk; + } + + blk_queue_max_hw_sectors(disks[dr]->queue, 64); disks[dr]->major = FLOPPY_MAJOR; disks[dr]->first_minor = TOMINOR(dr); disks[dr]->fops = &floppy_fops; @@ -4183,13 +4215,6 @@ static int __init floppy_init(void) if (err) goto out_unreg_blkdev; - floppy_queue = blk_init_queue(do_fd_request, &floppy_lock); - if (!floppy_queue) { - err = -ENOMEM; - goto out_unreg_driver; - } - blk_queue_max_hw_sectors(floppy_queue, 64); - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, floppy_find, NULL, NULL); @@ -4317,7 +4342,6 @@ static int __init floppy_init(void) /* to be cleaned up... */ disks[drive]->private_data = (void *)(long)drive; - disks[drive]->queue = floppy_queue; disks[drive]->flags |= GENHD_FL_REMOVABLE; disks[drive]->driverfs_dev = &floppy_device[drive].dev; add_disk(disks[drive]); @@ -4333,8 +4357,6 @@ out_flush_work: floppy_release_irq_and_dma(); out_unreg_region: blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - blk_cleanup_queue(floppy_queue); -out_unreg_driver: platform_driver_unregister(&floppy_driver); out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); @@ -4342,6 +4364,8 @@ out_put_disk: while (dr--) { del_timer(&motor_off_timer[dr]); put_disk(disks[dr]); + if (disks[dr]->queue) + blk_cleanup_queue(disks[dr]->queue); } return err; } @@ -4550,11 +4574,11 @@ static void __exit floppy_module_exit(void) platform_device_unregister(&floppy_device[drive]); } put_disk(disks[drive]); + blk_cleanup_queue(disks[drive]->queue); } del_timer_sync(&fd_timeout); del_timer_sync(&fd_timer); - blk_cleanup_queue(floppy_queue); if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f3c636d..5a39859 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -74,6 +74,7 @@ #include #include #include +#include #include @@ -477,17 +478,17 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; if (bio_rw(bio) == WRITE) { - bool barrier = (bio->bi_rw & REQ_HARDBARRIER); struct file *file = lo->lo_backing_file; - if (barrier) { - if (unlikely(!file->f_op->fsync)) { - ret = -EOPNOTSUPP; - goto out; - } + /* REQ_HARDBARRIER is deprecated */ + if (bio->bi_rw & REQ_HARDBARRIER) { + ret = -EOPNOTSUPP; + goto out; + } + if (bio->bi_rw & REQ_FLUSH) { ret = vfs_fsync(file, 0); - if (unlikely(ret)) { + if (unlikely(ret && ret != -EINVAL)) { ret = -EIO; goto out; } @@ -495,9 +496,9 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) ret = lo_send(lo, bio, pos); - if (barrier && !ret) { + if ((bio->bi_rw & REQ_FUA) && !ret) { ret = vfs_fsync(file, 0); - if (unlikely(ret)) + if (unlikely(ret && ret != -EINVAL)) ret = -EIO; } } else @@ -737,6 +738,103 @@ static inline int is_loop_device(struct file *file) return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR; } +/* loop sysfs attributes */ + +static ssize_t loop_attr_show(struct device *dev, char *page, + ssize_t (*callback)(struct loop_device *, char *)) +{ + struct loop_device *l, *lo = NULL; + + mutex_lock(&loop_devices_mutex); + list_for_each_entry(l, &loop_devices, lo_list) + if (disk_to_dev(l->lo_disk) == dev) { + lo = l; + break; + } + mutex_unlock(&loop_devices_mutex); + + return lo ? callback(lo, page) : -EIO; +} + +#define LOOP_ATTR_RO(_name) \ +static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \ +static ssize_t loop_attr_do_show_##_name(struct device *d, \ + struct device_attribute *attr, char *b) \ +{ \ + return loop_attr_show(d, b, loop_attr_##_name##_show); \ +} \ +static struct device_attribute loop_attr_##_name = \ + __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL); + +static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) +{ + ssize_t ret; + char *p = NULL; + + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_backing_file) + p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); + mutex_unlock(&lo->lo_ctl_mutex); + + if (IS_ERR_OR_NULL(p)) + ret = PTR_ERR(p); + else { + ret = strlen(p); + memmove(buf, p, ret); + buf[ret++] = '\n'; + buf[ret] = 0; + } + + return ret; +} + +static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) +{ + return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset); +} + +static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) +{ + return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); +} + +static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) +{ + int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); + + return sprintf(buf, "%s\n", autoclear ? "1" : "0"); +} + +LOOP_ATTR_RO(backing_file); +LOOP_ATTR_RO(offset); +LOOP_ATTR_RO(sizelimit); +LOOP_ATTR_RO(autoclear); + +static struct attribute *loop_attrs[] = { + &loop_attr_backing_file.attr, + &loop_attr_offset.attr, + &loop_attr_sizelimit.attr, + &loop_attr_autoclear.attr, + NULL, +}; + +static struct attribute_group loop_attribute_group = { + .name = "loop", + .attrs= loop_attrs, +}; + +static int loop_sysfs_init(struct loop_device *lo) +{ + return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, + &loop_attribute_group); +} + +static void loop_sysfs_exit(struct loop_device *lo) +{ + sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, + &loop_attribute_group); +} + static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { @@ -832,10 +930,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_queue->unplug_fn = loop_unplug; if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) - blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN); + blk_queue_flush(lo->lo_queue, REQ_FLUSH); set_capacity(lo->lo_disk, size); bd_set_size(bdev, size << 9); + loop_sysfs_init(lo); /* let user-space know about the new size */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); @@ -854,6 +953,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, return 0; out_clr: + loop_sysfs_exit(lo); lo->lo_thread = NULL; lo->lo_device = NULL; lo->lo_backing_file = NULL; @@ -950,6 +1050,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) set_capacity(lo->lo_disk, 0); if (bdev) { bd_set_size(bdev, 0); + loop_sysfs_exit(lo); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index b82c5ce..76fa3de 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -974,8 +974,7 @@ static int mg_probe(struct platform_device *plat_dev) host->breq->queuedata = host; /* mflash is random device, thanx for the noop */ - elevator_exit(host->breq->elevator); - err = elevator_init(host->breq, "noop"); + err = elevator_change(host->breq, "noop"); if (err) { printk(KERN_ERR "%s:%d (elevator_init) fail\n", __func__, __LINE__); diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 2284b4f..87311eb 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -310,8 +310,7 @@ static void osdblk_rq_fn(struct request_queue *q) break; /* filter out block requests we don't understand */ - if (rq->cmd_type != REQ_TYPE_FS && - !(rq->cmd_flags & REQ_HARDBARRIER)) { + if (rq->cmd_type != REQ_TYPE_FS) { blk_end_request_all(rq, 0); continue; } @@ -439,7 +438,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); blk_queue_prep_rq(q, blk_queue_start_tag); - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(q, REQ_FLUSH); disk->queue = q; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index b1cbeb5..0166ea13 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -753,7 +753,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->timeout = 60*HZ; rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->cmd_flags |= REQ_HARDBARRIER; if (cgc->quiet) rq->cmd_flags |= REQ_QUIET; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index e9da874..4911f9e 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -468,7 +468,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) blk_queue_dma_alignment(queue, dev->blk_size-1); blk_queue_logical_block_size(queue, dev->blk_size); - blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH); + blk_queue_flush(queue, REQ_FLUSH); blk_queue_max_segments(queue, -1); blk_queue_max_segment_size(queue, dev->bounce_size); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2aafafc..1260628 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -128,9 +128,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, } } - if (vbr->req->cmd_flags & REQ_HARDBARRIER) - vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; - sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr)); /* @@ -388,31 +385,9 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->disk->driverfs_dev = &vdev->dev; index++; - if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) { - /* - * If the FLUSH feature is supported we do have support for - * flushing a volatile write cache on the host. Use that - * to implement write barrier support. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH); - } else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) { - /* - * If the BARRIER feature is supported the host expects us - * to order request by tags. This implies there is not - * volatile write cache on the host, and that the host - * never re-orders outstanding I/O. This feature is not - * useful for real life scenarious and deprecated. - */ - blk_queue_ordered(q, QUEUE_ORDERED_TAG); - } else { - /* - * If the FLUSH feature is not supported we must assume that - * the host does not perform any kind of volatile write - * caching. We still need to drain the queue to provider - * proper barrier semantics. - */ - blk_queue_ordered(q, QUEUE_ORDERED_DRAIN); - } + /* configure queue flush support */ + if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) + blk_queue_flush(q, REQ_FLUSH); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) @@ -531,9 +506,9 @@ static const struct virtio_device_id id_table[] = { }; static unsigned int features[] = { - VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, - VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, - VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY + VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, + VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, + VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY }; /* diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index ab735a6..f2ffc46 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -95,7 +95,7 @@ struct blkfront_info struct gnttab_free_callback callback; struct blk_shadow shadow[BLK_RING_SIZE]; unsigned long shadow_free; - int feature_barrier; + unsigned int feature_flush; int is_ready; }; @@ -418,26 +418,12 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) } -static int xlvbd_barrier(struct blkfront_info *info) +static void xlvbd_flush(struct blkfront_info *info) { - int err; - const char *barrier; - - switch (info->feature_barrier) { - case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; - case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; - case QUEUE_ORDERED_NONE: barrier = "disabled"; break; - default: return -EINVAL; - } - - err = blk_queue_ordered(info->rq, info->feature_barrier); - - if (err) - return err; - + blk_queue_flush(info->rq, info->feature_flush); printk(KERN_INFO "blkfront: %s: barriers %s\n", - info->gd->disk_name, barrier); - return 0; + info->gd->disk_name, + info->feature_flush ? "enabled" : "disabled"); } @@ -516,7 +502,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, info->rq = gd->queue; info->gd = gd; - xlvbd_barrier(info); + xlvbd_flush(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -662,8 +648,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", info->gd->disk_name); error = -EOPNOTSUPP; - info->feature_barrier = QUEUE_ORDERED_NONE; - xlvbd_barrier(info); + info->feature_flush = 0; + xlvbd_flush(info); } /* fall through */ case BLKIF_OP_READ: @@ -1076,20 +1062,13 @@ static void blkfront_connect(struct blkfront_info *info) /* * If there's no "feature-barrier" defined, then it means * we're dealing with a very old backend which writes - * synchronously; draining will do what needs to get done. + * synchronously; nothing to do. * - * If there are barriers, then we can do full queued writes - * with tagged barriers. - * - * If barriers are not supported, then there's no much we can - * do, so just set ordering to NONE. + * If there are barriers, then we use flush. */ - if (err) - info->feature_barrier = QUEUE_ORDERED_DRAIN; - else if (barrier) - info->feature_barrier = QUEUE_ORDERED_TAG; - else - info->feature_barrier = QUEUE_ORDERED_NONE; + info->feature_flush = 0; + if (!err && barrier) + info->feature_flush = REQ_FLUSH; err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 710af89..eab58db 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -12,6 +12,7 @@ #include #include "agp.h" #include "intel-agp.h" +#include #include "intel-gtt.c" @@ -815,11 +816,19 @@ static const struct intel_driver_description { "HD Graphics", NULL, &intel_i965_driver }, { PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB, PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG, "HD Graphics", NULL, &intel_i965_driver }, - { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_IG, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT1_IG, "Sandybridge", NULL, &intel_gen6_driver }, - { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_IG, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_IG, "Sandybridge", NULL, &intel_gen6_driver }, - { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_D0_IG, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_PLUS_IG, + "Sandybridge", NULL, &intel_gen6_driver }, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT1_IG, + "Sandybridge", NULL, &intel_gen6_driver }, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_IG, + "Sandybridge", NULL, &intel_gen6_driver }, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_PLUS_IG, + "Sandybridge", NULL, &intel_gen6_driver }, + { PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_HB, PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_IG, "Sandybridge", NULL, &intel_gen6_driver }, { 0, 0, NULL, NULL, NULL } }; @@ -1044,6 +1053,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB), ID(PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB), ID(PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB), + ID(PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_HB), { } }; diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index 08d4753..ee189c7 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -1,6 +1,8 @@ /* * Common Intel AGPGART and GTT definitions. */ +#ifndef _INTEL_AGP_H +#define _INTEL_AGP_H /* Intel registers */ #define INTEL_APSIZE 0xb4 @@ -200,11 +202,16 @@ #define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB 0x0062 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB 0x006a #define PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG 0x0046 -#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB 0x0100 -#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_IG 0x0102 -#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB 0x0104 -#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_IG 0x0106 -#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_D0_IG 0x0126 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB 0x0100 /* Desktop */ +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT1_IG 0x0102 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_IG 0x0112 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_GT2_PLUS_IG 0x0122 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB 0x0104 /* Mobile */ +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT1_IG 0x0106 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_IG 0x0116 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_GT2_PLUS_IG 0x0126 +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_HB 0x0108 /* Server */ +#define PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_IG 0x010A /* cover 915 and 945 variants */ #define IS_I915 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_E7221_HB || \ @@ -231,7 +238,8 @@ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_PINEVIEW_HB) #define IS_SNB (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB || \ - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB) + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB || \ + agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_HB) #define IS_G4X (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_EAGLELAKE_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_Q45_HB || \ @@ -244,3 +252,5 @@ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB || \ agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB || \ IS_SNB) + +#endif diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index d22ffb8..75e0a34 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -49,6 +49,26 @@ static struct gatt_mask intel_i810_masks[] = .type = INTEL_AGP_CACHED_MEMORY} }; +#define INTEL_AGP_UNCACHED_MEMORY 0 +#define INTEL_AGP_CACHED_MEMORY_LLC 1 +#define INTEL_AGP_CACHED_MEMORY_LLC_GFDT 2 +#define INTEL_AGP_CACHED_MEMORY_LLC_MLC 3 +#define INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT 4 + +static struct gatt_mask intel_gen6_masks[] = +{ + {.mask = I810_PTE_VALID | GEN6_PTE_UNCACHED, + .type = INTEL_AGP_UNCACHED_MEMORY }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC, + .type = INTEL_AGP_CACHED_MEMORY_LLC }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC | GEN6_PTE_GFDT, + .type = INTEL_AGP_CACHED_MEMORY_LLC_GFDT }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC, + .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC }, + {.mask = I810_PTE_VALID | GEN6_PTE_LLC_MLC | GEN6_PTE_GFDT, + .type = INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT }, +}; + static struct _intel_private { struct pci_dev *pcidev; /* device one */ u8 __iomem *registers; @@ -178,13 +198,6 @@ static void intel_agp_insert_sg_entries(struct agp_memory *mem, off_t pg_start, int mask_type) { int i, j; - u32 cache_bits = 0; - - if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB || - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB) - { - cache_bits = GEN6_PTE_LLC_MLC; - } for (i = 0, j = pg_start; i < mem->page_count; i++, j++) { writel(agp_bridge->driver->mask_memory(agp_bridge, @@ -317,6 +330,23 @@ static int intel_i830_type_to_mask_type(struct agp_bridge_data *bridge, return 0; } +static int intel_gen6_type_to_mask_type(struct agp_bridge_data *bridge, + int type) +{ + unsigned int type_mask = type & ~AGP_USER_CACHED_MEMORY_GFDT; + unsigned int gfdt = type & AGP_USER_CACHED_MEMORY_GFDT; + + if (type_mask == AGP_USER_UNCACHED_MEMORY) + return INTEL_AGP_UNCACHED_MEMORY; + else if (type_mask == AGP_USER_CACHED_MEMORY_LLC_MLC) + return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_MLC_GFDT : + INTEL_AGP_CACHED_MEMORY_LLC_MLC; + else /* set 'normal'/'cached' to LLC by default */ + return gfdt ? INTEL_AGP_CACHED_MEMORY_LLC_GFDT : + INTEL_AGP_CACHED_MEMORY_LLC; +} + + static int intel_i810_insert_entries(struct agp_memory *mem, off_t pg_start, int type) { @@ -588,8 +618,7 @@ static void intel_i830_init_gtt_entries(void) gtt_entries = 0; break; } - } else if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB || - agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB) { + } else if (IS_SNB) { /* * SandyBridge has new memory control reg at 0x50.w */ @@ -1068,11 +1097,11 @@ static void intel_i9xx_setup_flush(void) intel_i915_setup_chipset_flush(); } - if (intel_private.ifp_resource.start) { + if (intel_private.ifp_resource.start) intel_private.i9xx_flush_page = ioremap_nocache(intel_private.ifp_resource.start, PAGE_SIZE); - if (!intel_private.i9xx_flush_page) - dev_info(&intel_private.pcidev->dev, "can't ioremap flush page - no chipset flushing"); - } + if (!intel_private.i9xx_flush_page) + dev_err(&intel_private.pcidev->dev, + "can't ioremap flush page - no chipset flushing\n"); } static int intel_i9xx_configure(void) @@ -1163,7 +1192,7 @@ static int intel_i915_insert_entries(struct agp_memory *mem, off_t pg_start, mask_type = agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type); - if (mask_type != 0 && mask_type != AGP_PHYS_MEMORY && + if (!IS_SNB && mask_type != 0 && mask_type != AGP_PHYS_MEMORY && mask_type != INTEL_AGP_CACHED_MEMORY) goto out_err; @@ -1333,8 +1362,8 @@ static unsigned long intel_i965_mask_memory(struct agp_bridge_data *bridge, static unsigned long intel_gen6_mask_memory(struct agp_bridge_data *bridge, dma_addr_t addr, int type) { - /* Shift high bits down */ - addr |= (addr >> 28) & 0xff; + /* gen6 has bit11-4 for physical addr bit39-32 */ + addr |= (addr >> 28) & 0xff0; /* Type checking must be done elsewhere */ return addr | bridge->driver->masks[type].mask; @@ -1359,6 +1388,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size) break; case PCI_DEVICE_ID_INTEL_SANDYBRIDGE_HB: case PCI_DEVICE_ID_INTEL_SANDYBRIDGE_M_HB: + case PCI_DEVICE_ID_INTEL_SANDYBRIDGE_S_HB: *gtt_offset = MB(2); pci_read_config_word(intel_private.pcidev, SNB_GMCH_CTRL, &snb_gmch_ctl); @@ -1563,7 +1593,7 @@ static const struct agp_bridge_driver intel_gen6_driver = { .fetch_size = intel_i9xx_fetch_size, .cleanup = intel_i915_cleanup, .mask_memory = intel_gen6_mask_memory, - .masks = intel_i810_masks, + .masks = intel_gen6_masks, .agp_enable = intel_i810_agp_enable, .cache_flush = global_cache_flush, .create_gatt_table = intel_i965_create_gatt_table, @@ -1576,7 +1606,7 @@ static const struct agp_bridge_driver intel_gen6_driver = { .agp_alloc_pages = agp_generic_alloc_pages, .agp_destroy_page = agp_generic_destroy_page, .agp_destroy_pages = agp_generic_destroy_pages, - .agp_type_to_mask_type = intel_i830_type_to_mask_type, + .agp_type_to_mask_type = intel_gen6_type_to_mask_type, .chipset_flush = intel_i915_chipset_flush, #ifdef USE_PCI_DMA_API .agp_map_page = intel_agp_map_page, diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 1acdb25..a3f5e38 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -387,7 +387,7 @@ static int n2rng_init_control(struct n2rng *np) static int n2rng_data_read(struct hwrng *rng, u32 *data) { - struct n2rng *np = rng->priv; + struct n2rng *np = (struct n2rng *) rng->priv; unsigned long ra = __pa(&np->test_data); int len; diff --git a/drivers/char/mem.c b/drivers/char/mem.c index a398ecd..1f528fa 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -788,10 +788,11 @@ static const struct file_operations zero_fops = { /* * capabilities for /dev/zero * - permits private mappings, "copies" are taken of the source of zeros + * - no writeback happens */ static struct backing_dev_info zero_bdi = { .name = "char/mem", - .capabilities = BDI_CAP_MAP_COPY, + .capabilities = BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK, }; static const struct file_operations full_fops = { diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 949067a..613c852 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -355,7 +355,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) if (*stp == '\0') stp = NULL; - if (tty_line >= 0 && tty_line <= p->num && p->ops && + if (tty_line >= 0 && tty_line < p->num && p->ops && p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) { res = tty_driver_kref_get(p); *line = tty_line; diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 50590c7..281aada 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -906,22 +906,16 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, * bottom of buffer */ old_origin += (old_rows - new_rows) * old_row_size; - end = vc->vc_scr_end; } else { /* * Cursor is in no man's land, copy 1/2 screenful * from the top and bottom of cursor position */ old_origin += (vc->vc_y - new_rows/2) * old_row_size; - end = old_origin + (old_row_size * new_rows); } - } else - /* - * Cursor near the top, copy contents from the top of buffer - */ - end = (old_rows > new_rows) ? old_origin + - (old_row_size * new_rows) : - vc->vc_scr_end; + } + + end = old_origin + old_row_size * min(old_rows, new_rows); update_attr(vc); @@ -3075,8 +3069,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, old_was_color = vc->vc_can_do_color; vc->vc_sw->con_deinit(vc); - if (!vc->vc_origin) - vc->vc_origin = (unsigned long)vc->vc_screenbuf; + vc->vc_origin = (unsigned long)vc->vc_screenbuf; visual_init(vc, i, 0); set_origin(vc); update_attr(vc); diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c index 2bbeaae..38df8c1 100644 --- a/drivers/char/vt_ioctl.c +++ b/drivers/char/vt_ioctl.c @@ -533,11 +533,14 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, case KIOCSOUND: if (!perm) goto eperm; - /* FIXME: This is an old broken API but we need to keep it - supported and somehow separate the historic advertised - tick rate from any real one */ + /* + * The use of PIT_TICK_RATE is historic, it used to be + * the platform-dependent CLOCK_TICK_RATE between 2.6.12 + * and 2.6.36, which was a minor but unfortunate ABI + * change. + */ if (arg) - arg = CLOCK_TICK_RATE / arg; + arg = PIT_TICK_RATE / arg; kd_mksound(arg, 0); break; @@ -553,11 +556,8 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, */ ticks = HZ * ((arg >> 16) & 0xffff) / 1000; count = ticks ? (arg & 0xffff) : 0; - /* FIXME: This is an old broken API but we need to keep it - supported and somehow separate the historic advertised - tick rate from any real one */ if (count) - count = CLOCK_TICK_RATE / count; + count = PIT_TICK_RATE / count; kd_mksound(count, ticks); break; } diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index 8661c84..b98c676 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -39,6 +39,10 @@ static DEFINE_SPINLOCK(dca_lock); static LIST_HEAD(dca_domains); +static BLOCKING_NOTIFIER_HEAD(dca_provider_chain); + +static int dca_providers_blocked; + static struct pci_bus *dca_pci_rc_from_dev(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -70,6 +74,60 @@ static void dca_free_domain(struct dca_domain *domain) kfree(domain); } +static int dca_provider_ioat_ver_3_0(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return ((pdev->vendor == PCI_VENDOR_ID_INTEL) && + ((pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG0) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG1) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG2) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG3) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG4) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG5) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG6) || + (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_TBG7))); +} + +static void unregister_dca_providers(void) +{ + struct dca_provider *dca, *_dca; + struct list_head unregistered_providers; + struct dca_domain *domain; + unsigned long flags; + + blocking_notifier_call_chain(&dca_provider_chain, + DCA_PROVIDER_REMOVE, NULL); + + INIT_LIST_HEAD(&unregistered_providers); + + spin_lock_irqsave(&dca_lock, flags); + + if (list_empty(&dca_domains)) { + spin_unlock_irqrestore(&dca_lock, flags); + return; + } + + /* at this point only one domain in the list is expected */ + domain = list_first_entry(&dca_domains, struct dca_domain, node); + if (!domain) + return; + + list_for_each_entry_safe(dca, _dca, &domain->dca_providers, node) { + list_del(&dca->node); + list_add(&dca->node, &unregistered_providers); + } + + dca_free_domain(domain); + + spin_unlock_irqrestore(&dca_lock, flags); + + list_for_each_entry_safe(dca, _dca, &unregistered_providers, node) { + dca_sysfs_remove_provider(dca); + list_del(&dca->node); + } +} + static struct dca_domain *dca_find_domain(struct pci_bus *rc) { struct dca_domain *domain; @@ -90,9 +148,13 @@ static struct dca_domain *dca_get_domain(struct device *dev) domain = dca_find_domain(rc); if (!domain) { - domain = dca_allocate_domain(rc); - if (domain) - list_add(&domain->node, &dca_domains); + if (dca_provider_ioat_ver_3_0(dev) && !list_empty(&dca_domains)) { + dca_providers_blocked = 1; + } else { + domain = dca_allocate_domain(rc); + if (domain) + list_add(&domain->node, &dca_domains); + } } return domain; @@ -293,8 +355,6 @@ void free_dca_provider(struct dca_provider *dca) } EXPORT_SYMBOL_GPL(free_dca_provider); -static BLOCKING_NOTIFIER_HEAD(dca_provider_chain); - /** * register_dca_provider - register a dca provider * @dca - struct created by alloc_dca_provider() @@ -306,6 +366,13 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) unsigned long flags; struct dca_domain *domain; + spin_lock_irqsave(&dca_lock, flags); + if (dca_providers_blocked) { + spin_unlock_irqrestore(&dca_lock, flags); + return -ENODEV; + } + spin_unlock_irqrestore(&dca_lock, flags); + err = dca_sysfs_add_provider(dca, dev); if (err) return err; @@ -313,7 +380,13 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) spin_lock_irqsave(&dca_lock, flags); domain = dca_get_domain(dev); if (!domain) { - spin_unlock_irqrestore(&dca_lock, flags); + if (dca_providers_blocked) { + spin_unlock_irqrestore(&dca_lock, flags); + dca_sysfs_remove_provider(dca); + unregister_dca_providers(); + } else { + spin_unlock_irqrestore(&dca_lock, flags); + } return -ENODEV; } list_add(&dca->node, &domain->dca_providers); diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index be29b0b..1b05896 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -263,6 +263,7 @@ static const struct { {PCI_VENDOR_ID_JMICRON, PCI_DEVICE_ID_JMICRON_JMB38X_FW, QUIRK_NO_MSI}, {PCI_VENDOR_ID_NEC, PCI_ANY_ID, QUIRK_CYCLE_TIMER}, {PCI_VENDOR_ID_VIA, PCI_ANY_ID, QUIRK_CYCLE_TIMER}, + {PCI_VENDOR_ID_RICOH, PCI_ANY_ID, QUIRK_CYCLE_TIMER}, {PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_UNI_N_FW, QUIRK_BE_HEADERS}, }; diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c index b42f42c..823559a 100644 --- a/drivers/gpio/sx150x.c +++ b/drivers/gpio/sx150x.c @@ -459,17 +459,33 @@ static int sx150x_init_io(struct sx150x_chip *chip, u8 base, u16 cfg) return err; } -static int sx150x_init_hw(struct sx150x_chip *chip, - struct sx150x_platform_data *pdata) +static int sx150x_reset(struct sx150x_chip *chip) { - int err = 0; + int err; - err = i2c_smbus_write_word_data(chip->client, + err = i2c_smbus_write_byte_data(chip->client, chip->dev_cfg->reg_reset, - 0x3412); + 0x12); if (err < 0) return err; + err = i2c_smbus_write_byte_data(chip->client, + chip->dev_cfg->reg_reset, + 0x34); + return err; +} + +static int sx150x_init_hw(struct sx150x_chip *chip, + struct sx150x_platform_data *pdata) +{ + int err = 0; + + if (pdata->reset_during_probe) { + err = sx150x_reset(chip); + if (err < 0) + return err; + } + err = sx150x_i2c_write(chip->client, chip->dev_cfg->reg_misc, 0x01); diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index 7e31d43..dcbeb98 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -34,6 +34,9 @@ #include "drm_crtc_helper.h" #include "drm_fb_helper.h" +static bool drm_kms_helper_poll = true; +module_param_named(poll, drm_kms_helper_poll, bool, 0600); + static void drm_mode_validate_flag(struct drm_connector *connector, int flags) { @@ -99,8 +102,10 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, connector->status = connector_status_disconnected; if (connector->funcs->force) connector->funcs->force(connector); - } else - connector->status = connector->funcs->detect(connector); + } else { + connector->status = connector->funcs->detect(connector, true); + drm_kms_helper_poll_enable(dev); + } if (connector->status == connector_status_disconnected) { DRM_DEBUG_KMS("[CONNECTOR:%d:%s] disconnected\n", @@ -110,11 +115,10 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, } count = (*connector_funcs->get_modes)(connector); - if (!count) { + if (count == 0 && connector->status == connector_status_connected) count = drm_add_modes_noedid(connector, 1024, 768); - if (!count) - return 0; - } + if (count == 0) + goto prune; drm_mode_connector_list_update(connector); @@ -633,13 +637,13 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) mode_changed = true; if (mode_changed) { - old_fb = set->crtc->fb; - set->crtc->fb = set->fb; set->crtc->enabled = (set->mode != NULL); if (set->mode != NULL) { DRM_DEBUG_KMS("attempting to set mode from" " userspace\n"); drm_mode_debug_printmodeline(set->mode); + old_fb = set->crtc->fb; + set->crtc->fb = set->fb; if (!drm_crtc_helper_set_mode(set->crtc, set->mode, set->x, set->y, old_fb)) { @@ -840,6 +844,9 @@ static void output_poll_execute(struct work_struct *work) enum drm_connector_status old_status, status; bool repoll = false, changed = false; + if (!drm_kms_helper_poll) + return; + mutex_lock(&dev->mode_config.mutex); list_for_each_entry(connector, &dev->mode_config.connector_list, head) { @@ -859,7 +866,7 @@ static void output_poll_execute(struct work_struct *work) !(connector->polled & DRM_CONNECTOR_POLL_HPD)) continue; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, false); if (old_status != status) changed = true; } @@ -890,6 +897,9 @@ void drm_kms_helper_poll_enable(struct drm_device *dev) bool poll = false; struct drm_connector *connector; + if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll) + return; + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { if (connector->polled) poll = true; @@ -919,8 +929,10 @@ void drm_helper_hpd_irq_event(struct drm_device *dev) { if (!dev->mode_config.poll_enabled) return; + /* kill timer and schedule immediate execution, this doesn't block */ cancel_delayed_work(&dev->mode_config.output_poll_work); - queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, 0); + if (drm_kms_helper_poll) + queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, 0); } EXPORT_SYMBOL(drm_helper_hpd_irq_event); diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index e20f78b..f5bd9e5 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -164,6 +164,8 @@ int drm_get_pci_dev(struct pci_dev *pdev, const struct pci_device_id *ent, dev->hose = pdev->sysdata; #endif + mutex_lock(&drm_global_mutex); + if ((ret = drm_fill_in_dev(dev, ent, driver))) { printk(KERN_ERR "DRM: Fill_in_dev failed.\n"); goto err_g2; @@ -199,6 +201,7 @@ int drm_get_pci_dev(struct pci_dev *pdev, const struct pci_device_id *ent, driver->name, driver->major, driver->minor, driver->patchlevel, driver->date, pci_name(pdev), dev->primary->index); + mutex_unlock(&drm_global_mutex); return 0; err_g4: @@ -210,6 +213,7 @@ err_g2: pci_disable_device(pdev); err_g1: kfree(dev); + mutex_unlock(&drm_global_mutex); return ret; } EXPORT_SYMBOL(drm_get_pci_dev); diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c index 460e9a3..92d1d0f 100644 --- a/drivers/gpu/drm/drm_platform.c +++ b/drivers/gpu/drm/drm_platform.c @@ -53,6 +53,8 @@ int drm_get_platform_dev(struct platform_device *platdev, dev->platformdev = platdev; dev->dev = &platdev->dev; + mutex_lock(&drm_global_mutex); + ret = drm_fill_in_dev(dev, NULL, driver); if (ret) { @@ -87,6 +89,8 @@ int drm_get_platform_dev(struct platform_device *platdev, list_add_tail(&dev->driver_item, &driver->device_list); + mutex_unlock(&drm_global_mutex); + DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", driver->name, driver->major, driver->minor, driver->patchlevel, driver->date, dev->primary->index); @@ -100,6 +104,7 @@ err_g2: drm_put_minor(&dev->control); err_g1: kfree(dev); + mutex_unlock(&drm_global_mutex); return ret; } EXPORT_SYMBOL(drm_get_platform_dev); diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 86118a7..85da4c4 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -159,7 +159,7 @@ static ssize_t status_show(struct device *device, struct drm_connector *connector = to_drm_connector(device); enum drm_connector_status status; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); return snprintf(buf, PAGE_SIZE, "%s\n", drm_get_connector_status_name(status)); } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 92d5605..5e43d70 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -31,6 +31,7 @@ #include #include "drmP.h" #include "drm.h" +#include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" @@ -121,6 +122,54 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) return 0; } +static int i915_gem_pageflip_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + unsigned long flags; + struct intel_crtc *crtc; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) { + const char *pipe = crtc->pipe ? "B" : "A"; + const char *plane = crtc->plane ? "B" : "A"; + struct intel_unpin_work *work; + + spin_lock_irqsave(&dev->event_lock, flags); + work = crtc->unpin_work; + if (work == NULL) { + seq_printf(m, "No flip due on pipe %s (plane %s)\n", + pipe, plane); + } else { + if (!work->pending) { + seq_printf(m, "Flip queued on pipe %s (plane %s)\n", + pipe, plane); + } else { + seq_printf(m, "Flip pending (waiting for vsync) on pipe %s (plane %s)\n", + pipe, plane); + } + if (work->enable_stall_check) + seq_printf(m, "Stall check enabled, "); + else + seq_printf(m, "Stall check waiting for page flip ioctl, "); + seq_printf(m, "%d prepares\n", work->pending); + + if (work->old_fb_obj) { + struct drm_i915_gem_object *obj_priv = to_intel_bo(work->old_fb_obj); + if(obj_priv) + seq_printf(m, "Old framebuffer gtt_offset 0x%08x\n", obj_priv->gtt_offset ); + } + if (work->pending_flip_obj) { + struct drm_i915_gem_object *obj_priv = to_intel_bo(work->pending_flip_obj); + if(obj_priv) + seq_printf(m, "New framebuffer gtt_offset 0x%08x\n", obj_priv->gtt_offset ); + } + } + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + return 0; +} + static int i915_gem_request_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *) m->private; @@ -777,6 +826,7 @@ static struct drm_info_list i915_debugfs_list[] = { {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, {"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST}, {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST}, + {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, {"i915_gem_request", i915_gem_request_info, 0}, {"i915_gem_seqno", i915_gem_seqno_info, 0}, {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a7ec93e..9d67b48 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -620,8 +620,10 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, ret = copy_from_user(cliprects, batch->cliprects, batch->num_cliprects * sizeof(struct drm_clip_rect)); - if (ret != 0) + if (ret != 0) { + ret = -EFAULT; goto fail_free; + } } mutex_lock(&dev->struct_mutex); @@ -662,8 +664,10 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, return -ENOMEM; ret = copy_from_user(batch_data, cmdbuf->buf, cmdbuf->sz); - if (ret != 0) + if (ret != 0) { + ret = -EFAULT; goto fail_batch_free; + } if (cmdbuf->num_cliprects) { cliprects = kcalloc(cmdbuf->num_cliprects, @@ -676,8 +680,10 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, ret = copy_from_user(cliprects, cmdbuf->cliprects, cmdbuf->num_cliprects * sizeof(struct drm_clip_rect)); - if (ret != 0) + if (ret != 0) { + ret = -EFAULT; goto fail_clip_free; + } } mutex_lock(&dev->struct_mutex); @@ -885,7 +891,7 @@ intel_alloc_mchbar_resource(struct drm_device *dev) int reg = IS_I965G(dev) ? MCHBAR_I965 : MCHBAR_I915; u32 temp_lo, temp_hi = 0; u64 mchbar_addr; - int ret = 0; + int ret; if (IS_I965G(dev)) pci_read_config_dword(dev_priv->bridge_dev, reg + 4, &temp_hi); @@ -895,22 +901,23 @@ intel_alloc_mchbar_resource(struct drm_device *dev) /* If ACPI doesn't have it, assume we need to allocate it ourselves */ #ifdef CONFIG_PNP if (mchbar_addr && - pnp_range_reserved(mchbar_addr, mchbar_addr + MCHBAR_SIZE)) { - ret = 0; - goto out; - } + pnp_range_reserved(mchbar_addr, mchbar_addr + MCHBAR_SIZE)) + return 0; #endif /* Get some space for it */ - ret = pci_bus_alloc_resource(dev_priv->bridge_dev->bus, &dev_priv->mch_res, + dev_priv->mch_res.name = "i915 MCHBAR"; + dev_priv->mch_res.flags = IORESOURCE_MEM; + ret = pci_bus_alloc_resource(dev_priv->bridge_dev->bus, + &dev_priv->mch_res, MCHBAR_SIZE, MCHBAR_SIZE, PCIBIOS_MIN_MEM, - 0, pcibios_align_resource, + 0, pcibios_align_resource, dev_priv->bridge_dev); if (ret) { DRM_DEBUG_DRIVER("failed bus alloc: %d\n", ret); dev_priv->mch_res.start = 0; - goto out; + return ret; } if (IS_I965G(dev)) @@ -919,8 +926,7 @@ intel_alloc_mchbar_resource(struct drm_device *dev) pci_write_config_dword(dev_priv->bridge_dev, reg, lower_32_bits(dev_priv->mch_res.start)); -out: - return ret; + return 0; } /* Setup MCHBAR if possible, return true if we should disable it again */ @@ -2082,6 +2088,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto free_priv; } + /* overlay on gen2 is broken and can't address above 1G */ + if (IS_GEN2(dev)) + dma_set_coherent_mask(&dev->pdev->dev, DMA_BIT_MASK(30)); + dev_priv->regs = ioremap(base, size); if (!dev_priv->regs) { DRM_ERROR("failed to map registers\n"); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 00befce..216deb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -61,91 +61,86 @@ extern int intel_agp_enabled; .driver_data = (unsigned long) info } static const struct intel_device_info intel_i830_info = { - .is_i8xx = 1, .is_mobile = 1, .cursor_needs_physical = 1, + .gen = 2, .is_i8xx = 1, .is_mobile = 1, .cursor_needs_physical = 1, }; static const struct intel_device_info intel_845g_info = { - .is_i8xx = 1, + .gen = 2, .is_i8xx = 1, }; static const struct intel_device_info intel_i85x_info = { - .is_i8xx = 1, .is_i85x = 1, .is_mobile = 1, + .gen = 2, .is_i8xx = 1, .is_i85x = 1, .is_mobile = 1, .cursor_needs_physical = 1, }; static const struct intel_device_info intel_i865g_info = { - .is_i8xx = 1, + .gen = 2, .is_i8xx = 1, }; static const struct intel_device_info intel_i915g_info = { - .is_i915g = 1, .is_i9xx = 1, .cursor_needs_physical = 1, + .gen = 3, .is_i915g = 1, .is_i9xx = 1, .cursor_needs_physical = 1, }; static const struct intel_device_info intel_i915gm_info = { - .is_i9xx = 1, .is_mobile = 1, + .gen = 3, .is_i9xx = 1, .is_mobile = 1, .cursor_needs_physical = 1, }; static const struct intel_device_info intel_i945g_info = { - .is_i9xx = 1, .has_hotplug = 1, .cursor_needs_physical = 1, + .gen = 3, .is_i9xx = 1, .has_hotplug = 1, .cursor_needs_physical = 1, }; static const struct intel_device_info intel_i945gm_info = { - .is_i945gm = 1, .is_i9xx = 1, .is_mobile = 1, + .gen = 3, .is_i945gm = 1, .is_i9xx = 1, .is_mobile = 1, .has_hotplug = 1, .cursor_needs_physical = 1, }; static const struct intel_device_info intel_i965g_info = { - .is_broadwater = 1, .is_i965g = 1, .is_i9xx = 1, .has_hotplug = 1, + .gen = 4, .is_broadwater = 1, .is_i965g = 1, .is_i9xx = 1, + .has_hotplug = 1, }; static const struct intel_device_info intel_i965gm_info = { - .is_crestline = 1, .is_i965g = 1, .is_i965gm = 1, .is_i9xx = 1, - .is_mobile = 1, .has_fbc = 1, .has_rc6 = 1, - .has_hotplug = 1, + .gen = 4, .is_crestline = 1, .is_i965g = 1, .is_i965gm = 1, .is_i9xx = 1, + .is_mobile = 1, .has_fbc = 1, .has_rc6 = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_g33_info = { - .is_g33 = 1, .is_i9xx = 1, .need_gfx_hws = 1, - .has_hotplug = 1, + .gen = 3, .is_g33 = 1, .is_i9xx = 1, + .need_gfx_hws = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_g45_info = { - .is_i965g = 1, .is_g4x = 1, .is_i9xx = 1, .need_gfx_hws = 1, - .has_pipe_cxsr = 1, - .has_hotplug = 1, + .gen = 4, .is_i965g = 1, .is_g4x = 1, .is_i9xx = 1, .need_gfx_hws = 1, + .has_pipe_cxsr = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_gm45_info = { - .is_i965g = 1, .is_g4x = 1, .is_i9xx = 1, + .gen = 4, .is_i965g = 1, .is_g4x = 1, .is_i9xx = 1, .is_mobile = 1, .need_gfx_hws = 1, .has_fbc = 1, .has_rc6 = 1, - .has_pipe_cxsr = 1, - .has_hotplug = 1, + .has_pipe_cxsr = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_pineview_info = { - .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .is_i9xx = 1, - .need_gfx_hws = 1, - .has_hotplug = 1, + .gen = 3, .is_g33 = 1, .is_pineview = 1, .is_mobile = 1, .is_i9xx = 1, + .need_gfx_hws = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_ironlake_d_info = { - .is_ironlake = 1, .is_i965g = 1, .is_i9xx = 1, .need_gfx_hws = 1, - .has_pipe_cxsr = 1, - .has_hotplug = 1, + .gen = 5, .is_ironlake = 1, .is_i965g = 1, .is_i9xx = 1, + .need_gfx_hws = 1, .has_pipe_cxsr = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_ironlake_m_info = { - .is_ironlake = 1, .is_mobile = 1, .is_i965g = 1, .is_i9xx = 1, - .need_gfx_hws = 1, .has_fbc = 1, .has_rc6 = 1, - .has_hotplug = 1, + .gen = 5, .is_ironlake = 1, .is_mobile = 1, .is_i965g = 1, .is_i9xx = 1, + .need_gfx_hws = 1, .has_fbc = 1, .has_rc6 = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_sandybridge_d_info = { - .is_i965g = 1, .is_i9xx = 1, .need_gfx_hws = 1, - .has_hotplug = 1, .is_gen6 = 1, + .gen = 6, .is_i965g = 1, .is_i9xx = 1, + .need_gfx_hws = 1, .has_hotplug = 1, }; static const struct intel_device_info intel_sandybridge_m_info = { - .is_i965g = 1, .is_mobile = 1, .is_i9xx = 1, .need_gfx_hws = 1, - .has_hotplug = 1, .is_gen6 = 1, + .gen = 6, .is_i965g = 1, .is_mobile = 1, .is_i9xx = 1, + .need_gfx_hws = 1, .has_hotplug = 1, }; static const struct pci_device_id pciidlist[] = { /* aka */ @@ -180,8 +175,12 @@ static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_VGA_DEVICE(0x0042, &intel_ironlake_d_info), INTEL_VGA_DEVICE(0x0046, &intel_ironlake_m_info), INTEL_VGA_DEVICE(0x0102, &intel_sandybridge_d_info), + INTEL_VGA_DEVICE(0x0112, &intel_sandybridge_d_info), + INTEL_VGA_DEVICE(0x0122, &intel_sandybridge_d_info), INTEL_VGA_DEVICE(0x0106, &intel_sandybridge_m_info), + INTEL_VGA_DEVICE(0x0116, &intel_sandybridge_m_info), INTEL_VGA_DEVICE(0x0126, &intel_sandybridge_m_info), + INTEL_VGA_DEVICE(0x010A, &intel_sandybridge_d_info), {0, 0, 0} }; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 047cd7c..af4a263 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -191,6 +191,7 @@ struct drm_i915_display_funcs { }; struct intel_device_info { + u8 gen; u8 is_mobile : 1; u8 is_i8xx : 1; u8 is_i85x : 1; @@ -206,7 +207,6 @@ struct intel_device_info { u8 is_broadwater : 1; u8 is_crestline : 1; u8 is_ironlake : 1; - u8 is_gen6 : 1; u8 has_fbc : 1; u8 has_rc6 : 1; u8 has_pipe_cxsr : 1; @@ -1162,7 +1162,6 @@ extern void intel_overlay_print_error_state(struct seq_file *m, struct intel_ove #define IS_845G(dev) ((dev)->pci_device == 0x2562) #define IS_I85X(dev) (INTEL_INFO(dev)->is_i85x) #define IS_I865G(dev) ((dev)->pci_device == 0x2572) -#define IS_GEN2(dev) (INTEL_INFO(dev)->is_i8xx) #define IS_I915G(dev) (INTEL_INFO(dev)->is_i915g) #define IS_I915GM(dev) ((dev)->pci_device == 0x2592) #define IS_I945G(dev) ((dev)->pci_device == 0x2772) @@ -1181,27 +1180,13 @@ extern void intel_overlay_print_error_state(struct seq_file *m, struct intel_ove #define IS_IRONLAKE_M(dev) ((dev)->pci_device == 0x0046) #define IS_IRONLAKE(dev) (INTEL_INFO(dev)->is_ironlake) #define IS_I9XX(dev) (INTEL_INFO(dev)->is_i9xx) -#define IS_GEN6(dev) (INTEL_INFO(dev)->is_gen6) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) -#define IS_GEN3(dev) (IS_I915G(dev) || \ - IS_I915GM(dev) || \ - IS_I945G(dev) || \ - IS_I945GM(dev) || \ - IS_G33(dev) || \ - IS_PINEVIEW(dev)) -#define IS_GEN4(dev) ((dev)->pci_device == 0x2972 || \ - (dev)->pci_device == 0x2982 || \ - (dev)->pci_device == 0x2992 || \ - (dev)->pci_device == 0x29A2 || \ - (dev)->pci_device == 0x2A02 || \ - (dev)->pci_device == 0x2A12 || \ - (dev)->pci_device == 0x2E02 || \ - (dev)->pci_device == 0x2E12 || \ - (dev)->pci_device == 0x2E22 || \ - (dev)->pci_device == 0x2E32 || \ - (dev)->pci_device == 0x2A42 || \ - (dev)->pci_device == 0x2E42) +#define IS_GEN2(dev) (INTEL_INFO(dev)->gen == 2) +#define IS_GEN3(dev) (INTEL_INFO(dev)->gen == 3) +#define IS_GEN4(dev) (INTEL_INFO(dev)->gen == 4) +#define IS_GEN5(dev) (INTEL_INFO(dev)->gen == 5) +#define IS_GEN6(dev) (INTEL_INFO(dev)->gen == 6) #define HAS_BSD(dev) (IS_IRONLAKE(dev) || IS_G4X(dev)) #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index df5a713..16fca1d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -34,6 +34,7 @@ #include #include #include +#include static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj); static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); @@ -135,12 +136,15 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOMEM; ret = drm_gem_handle_create(file_priv, obj, &handle); - drm_gem_object_unreference_unlocked(obj); - if (ret) + if (ret) { + drm_gem_object_unreference_unlocked(obj); return ret; + } - args->handle = handle; + /* Sink the floating reference from kref_init(handlecount) */ + drm_gem_object_handle_unreference_unlocked(obj); + args->handle = handle; return 0; } @@ -3585,6 +3589,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret != 0) { DRM_ERROR("copy %d cliprects failed: %d\n", args->num_cliprects, ret); + ret = -EFAULT; goto pre_mutex_err; } } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 16861b8..744225e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -887,6 +887,49 @@ static void i915_handle_error(struct drm_device *dev, bool wedged) queue_work(dev_priv->wq, &dev_priv->error_work); } +static void i915_pageflip_stall_check(struct drm_device *dev, int pipe) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_gem_object *obj_priv; + struct intel_unpin_work *work; + unsigned long flags; + bool stall_detected; + + /* Ignore early vblank irqs */ + if (intel_crtc == NULL) + return; + + spin_lock_irqsave(&dev->event_lock, flags); + work = intel_crtc->unpin_work; + + if (work == NULL || work->pending || !work->enable_stall_check) { + /* Either the pending flip IRQ arrived, or we're too early. Don't check */ + spin_unlock_irqrestore(&dev->event_lock, flags); + return; + } + + /* Potential stall - if we see that the flip has happened, assume a missed interrupt */ + obj_priv = to_intel_bo(work->pending_flip_obj); + if(IS_I965G(dev)) { + int dspsurf = intel_crtc->plane == 0 ? DSPASURF : DSPBSURF; + stall_detected = I915_READ(dspsurf) == obj_priv->gtt_offset; + } else { + int dspaddr = intel_crtc->plane == 0 ? DSPAADDR : DSPBADDR; + stall_detected = I915_READ(dspaddr) == (obj_priv->gtt_offset + + crtc->y * crtc->fb->pitch + + crtc->x * crtc->fb->bits_per_pixel/8); + } + + spin_unlock_irqrestore(&dev->event_lock, flags); + + if (stall_detected) { + DRM_DEBUG_DRIVER("Pageflip stall detected\n"); + intel_prepare_page_flip(dev, intel_crtc->plane); + } +} + irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -1004,15 +1047,19 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) if (pipea_stats & vblank_status) { vblank++; drm_handle_vblank(dev, 0); - if (!dev_priv->flip_pending_is_done) + if (!dev_priv->flip_pending_is_done) { + i915_pageflip_stall_check(dev, 0); intel_finish_page_flip(dev, 0); + } } if (pipeb_stats & vblank_status) { vblank++; drm_handle_vblank(dev, 1); - if (!dev_priv->flip_pending_is_done) + if (!dev_priv->flip_pending_is_done) { + i915_pageflip_stall_check(dev, 1); intel_finish_page_flip(dev, 1); + } } if ((pipea_stats & PIPE_LEGACY_BLC_EVENT_STATUS) || @@ -1303,17 +1350,25 @@ void i915_hangcheck_elapsed(unsigned long data) i915_seqno_passed(i915_get_gem_seqno(dev, &dev_priv->render_ring), i915_get_tail_request(dev)->seqno)) { + bool missed_wakeup = false; + dev_priv->hangcheck_count = 0; /* Issue a wake-up to catch stuck h/w. */ - if (dev_priv->render_ring.waiting_gem_seqno | - dev_priv->bsd_ring.waiting_gem_seqno) { - DRM_ERROR("Hangcheck timer elapsed... GPU idle, missed IRQ.\n"); - if (dev_priv->render_ring.waiting_gem_seqno) - DRM_WAKEUP(&dev_priv->render_ring.irq_queue); - if (dev_priv->bsd_ring.waiting_gem_seqno) - DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue); + if (dev_priv->render_ring.waiting_gem_seqno && + waitqueue_active(&dev_priv->render_ring.irq_queue)) { + DRM_WAKEUP(&dev_priv->render_ring.irq_queue); + missed_wakeup = true; } + + if (dev_priv->bsd_ring.waiting_gem_seqno && + waitqueue_active(&dev_priv->bsd_ring.irq_queue)) { + DRM_WAKEUP(&dev_priv->bsd_ring.irq_queue); + missed_wakeup = true; + } + + if (missed_wakeup) + DRM_ERROR("Hangcheck timer elapsed... GPU idle, missed IRQ.\n"); return; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 67e3ec1..4f5e155 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -319,6 +319,7 @@ #define MI_MODE 0x0209c # define VS_TIMER_DISPATCH (1 << 6) +# define MI_FLUSH_ENABLE (1 << 11) #define SCPD0 0x0209c /* 915+ only */ #define IER 0x020a0 @@ -2205,9 +2206,17 @@ #define WM1_LP_SR_EN (1<<31) #define WM1_LP_LATENCY_SHIFT 24 #define WM1_LP_LATENCY_MASK (0x7f<<24) +#define WM1_LP_FBC_LP1_MASK (0xf<<20) +#define WM1_LP_FBC_LP1_SHIFT 20 #define WM1_LP_SR_MASK (0x1ff<<8) #define WM1_LP_SR_SHIFT 8 #define WM1_LP_CURSOR_MASK (0x3f) +#define WM2_LP_ILK 0x4510c +#define WM2_LP_EN (1<<31) +#define WM3_LP_ILK 0x45110 +#define WM3_LP_EN (1<<31) +#define WM1S_LP_ILK 0x45120 +#define WM1S_LP_EN (1<<31) /* Memory latency timer register */ #define MLTR_ILK 0x11222 diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 4b77351..a02a8df 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -400,7 +400,8 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder return status; } -static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_crt_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -419,6 +420,9 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto if (intel_crt_detect_ddc(encoder)) return connector_status_connected; + if (!force) + return connector->status; + /* for pre-945g platforms use load detect */ if (encoder->crtc && encoder->crtc->enabled) { status = intel_crt_load_detect(encoder->crtc, intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 11a3394..19daead 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -990,6 +990,22 @@ void intel_wait_for_vblank(struct drm_device *dev, int pipe) struct drm_i915_private *dev_priv = dev->dev_private; int pipestat_reg = (pipe == 0 ? PIPEASTAT : PIPEBSTAT); + /* Clear existing vblank status. Note this will clear any other + * sticky status fields as well. + * + * This races with i915_driver_irq_handler() with the result + * that either function could miss a vblank event. Here it is not + * fatal, as we will either wait upon the next vblank interrupt or + * timeout. Generally speaking intel_wait_for_vblank() is only + * called during modeset at which time the GPU should be idle and + * should *not* be performing page flips and thus not waiting on + * vblanks... + * Currently, the result of us stealing a vblank from the irq + * handler is that a single frame will be skipped during swapbuffers. + */ + I915_WRITE(pipestat_reg, + I915_READ(pipestat_reg) | PIPE_VBLANK_INTERRUPT_STATUS); + /* Wait for vblank interrupt bit to set */ if (wait_for((I915_READ(pipestat_reg) & PIPE_VBLANK_INTERRUPT_STATUS), @@ -1486,7 +1502,7 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, dspcntr &= ~DISPPLANE_TILED; } - if (IS_IRONLAKE(dev)) + if (HAS_PCH_SPLIT(dev)) /* must disable */ dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -1495,20 +1511,19 @@ intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb, Start = obj_priv->gtt_offset; Offset = y * fb->pitch + x * (fb->bits_per_pixel / 8); - DRM_DEBUG("Writing base %08lX %08lX %d %d\n", Start, Offset, x, y); + DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n", + Start, Offset, x, y, fb->pitch); I915_WRITE(dspstride, fb->pitch); if (IS_I965G(dev)) { - I915_WRITE(dspbase, Offset); - I915_READ(dspbase); I915_WRITE(dspsurf, Start); - I915_READ(dspsurf); I915_WRITE(dsptileoff, (y << 16) | x); + I915_WRITE(dspbase, Offset); } else { I915_WRITE(dspbase, Start + Offset); - I915_READ(dspbase); } + POSTING_READ(dspbase); - if ((IS_I965G(dev) || plane == 0)) + if (IS_I965G(dev) || plane == 0) intel_update_fbc(crtc, &crtc->mode); intel_wait_for_vblank(dev, intel_crtc->pipe); @@ -1522,7 +1537,6 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_master_private *master_priv; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_framebuffer *intel_fb; @@ -1530,13 +1544,6 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_gem_object *obj; int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; - unsigned long Start, Offset; - int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR); - int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF); - int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE; - int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF); - int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; - u32 dspcntr; int ret; /* no fb bound */ @@ -1572,71 +1579,18 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, return ret; } - dspcntr = I915_READ(dspcntr_reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; - switch (crtc->fb->bits_per_pixel) { - case 8: - dspcntr |= DISPPLANE_8BPP; - break; - case 16: - if (crtc->fb->depth == 15) - dspcntr |= DISPPLANE_15_16BPP; - else - dspcntr |= DISPPLANE_16BPP; - break; - case 24: - case 32: - if (crtc->fb->depth == 30) - dspcntr |= DISPPLANE_32BPP_30BIT_NO_ALPHA; - else - dspcntr |= DISPPLANE_32BPP_NO_ALPHA; - break; - default: - DRM_ERROR("Unknown color depth\n"); + ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y); + if (ret) { i915_gem_object_unpin(obj); mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - if (IS_I965G(dev)) { - if (obj_priv->tiling_mode != I915_TILING_NONE) - dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - } - - if (HAS_PCH_SPLIT(dev)) - /* must disable */ - dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - - I915_WRITE(dspcntr_reg, dspcntr); - - Start = obj_priv->gtt_offset; - Offset = y * crtc->fb->pitch + x * (crtc->fb->bits_per_pixel / 8); - - DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n", - Start, Offset, x, y, crtc->fb->pitch); - I915_WRITE(dspstride, crtc->fb->pitch); - if (IS_I965G(dev)) { - I915_WRITE(dspsurf, Start); - I915_WRITE(dsptileoff, (y << 16) | x); - I915_WRITE(dspbase, Offset); - } else { - I915_WRITE(dspbase, Start + Offset); + return ret; } - POSTING_READ(dspbase); - - if ((IS_I965G(dev) || plane == 0)) - intel_update_fbc(crtc, &crtc->mode); - - intel_wait_for_vblank(dev, pipe); if (old_fb) { intel_fb = to_intel_framebuffer(old_fb); obj_priv = to_intel_bo(intel_fb->obj); i915_gem_object_unpin(intel_fb->obj); } - intel_increase_pllclock(crtc, true); mutex_unlock(&dev->struct_mutex); @@ -1911,9 +1865,6 @@ static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode) int fdi_tx_reg = (pipe == 0) ? FDI_TXA_CTL : FDI_TXB_CTL; int fdi_rx_reg = (pipe == 0) ? FDI_RXA_CTL : FDI_RXB_CTL; int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF; - int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1; - int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ; - int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS; int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B; int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B; int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B; @@ -1982,15 +1933,19 @@ static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode) } /* Enable panel fitting for LVDS */ - if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) - || HAS_eDP || intel_pch_has_edp(crtc)) { - if (dev_priv->pch_pf_size) { - temp = I915_READ(pf_ctl_reg); - I915_WRITE(pf_ctl_reg, temp | PF_ENABLE | PF_FILTER_MED_3x3); - I915_WRITE(pf_win_pos, dev_priv->pch_pf_pos); - I915_WRITE(pf_win_size, dev_priv->pch_pf_size); - } else - I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE); + if (dev_priv->pch_pf_size && + (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) + || HAS_eDP || intel_pch_has_edp(crtc))) { + /* Force use of hard-coded filter coefficients + * as some pre-programmed values are broken, + * e.g. x201. + */ + I915_WRITE(pipe ? PFB_CTL_1 : PFA_CTL_1, + PF_ENABLE | PF_FILTER_MED_3x3); + I915_WRITE(pipe ? PFB_WIN_POS : PFA_WIN_POS, + dev_priv->pch_pf_pos); + I915_WRITE(pipe ? PFB_WIN_SZ : PFA_WIN_SZ, + dev_priv->pch_pf_size); } /* Enable CPU pipe */ @@ -2115,7 +2070,7 @@ static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode) I915_WRITE(transconf_reg, temp | TRANS_ENABLE); I915_READ(transconf_reg); - if (wait_for(I915_READ(transconf_reg) & TRANS_STATE_ENABLE, 10, 0)) + if (wait_for(I915_READ(transconf_reg) & TRANS_STATE_ENABLE, 100, 1)) DRM_ERROR("failed to enable transcoder\n"); } @@ -2155,14 +2110,8 @@ static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode) udelay(100); /* Disable PF */ - temp = I915_READ(pf_ctl_reg); - if ((temp & PF_ENABLE) != 0) { - I915_WRITE(pf_ctl_reg, temp & ~PF_ENABLE); - I915_READ(pf_ctl_reg); - } - I915_WRITE(pf_win_size, 0); - POSTING_READ(pf_win_size); - + I915_WRITE(pipe ? PFB_CTL_1 : PFA_CTL_1, 0); + I915_WRITE(pipe ? PFB_WIN_SZ : PFA_WIN_SZ, 0); /* disable CPU FDI tx and PCH FDI rx */ temp = I915_READ(fdi_tx_reg); @@ -2421,6 +2370,9 @@ static void intel_crtc_dpms(struct drm_crtc *crtc, int mode) int pipe = intel_crtc->pipe; bool enabled; + if (intel_crtc->dpms_mode == mode) + return; + intel_crtc->dpms_mode = mode; intel_crtc->cursor_on = mode == DRM_MODE_DPMS_ON; @@ -2815,14 +2767,8 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, /* Don't promote wm_size to unsigned... */ if (wm_size > (long)wm->max_wm) wm_size = wm->max_wm; - if (wm_size <= 0) { + if (wm_size <= 0) wm_size = wm->default_wm; - DRM_ERROR("Insufficient FIFO for plane, expect flickering:" - " entries required = %ld, available = %lu.\n", - entries_required + wm->guard_size, - wm->fifo_size); - } - return wm_size; } @@ -3436,8 +3382,7 @@ static void ironlake_update_wm(struct drm_device *dev, int planea_clock, reg_value = I915_READ(WM1_LP_ILK); reg_value &= ~(WM1_LP_LATENCY_MASK | WM1_LP_SR_MASK | WM1_LP_CURSOR_MASK); - reg_value |= WM1_LP_SR_EN | - (ilk_sr_latency << WM1_LP_LATENCY_SHIFT) | + reg_value |= (ilk_sr_latency << WM1_LP_LATENCY_SHIFT) | (sr_wm << WM1_LP_SR_SHIFT) | cursor_wm; I915_WRITE(WM1_LP_ILK, reg_value); @@ -3554,10 +3499,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, u32 dpll = 0, fp = 0, fp2 = 0, dspcntr, pipeconf; bool ok, has_reduced_clock = false, is_sdvo = false, is_dvo = false; bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false; - bool is_edp = false; + struct intel_encoder *has_edp_encoder = NULL; struct drm_mode_config *mode_config = &dev->mode_config; struct drm_encoder *encoder; - struct intel_encoder *intel_encoder = NULL; const intel_limit_t *limit; int ret; struct fdi_m_n m_n = {0}; @@ -3578,12 +3522,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, drm_vblank_pre_modeset(dev, pipe); list_for_each_entry(encoder, &mode_config->encoder_list, head) { + struct intel_encoder *intel_encoder; - if (!encoder || encoder->crtc != crtc) + if (encoder->crtc != crtc) continue; intel_encoder = enc_to_intel_encoder(encoder); - switch (intel_encoder->type) { case INTEL_OUTPUT_LVDS: is_lvds = true; @@ -3607,7 +3551,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, is_dp = true; break; case INTEL_OUTPUT_EDP: - is_edp = true; + has_edp_encoder = intel_encoder; break; } @@ -3685,10 +3629,10 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, int lane = 0, link_bw, bpp; /* eDP doesn't require FDI link, so just set DP M/N according to current link config */ - if (is_edp) { + if (has_edp_encoder) { target_clock = mode->clock; - intel_edp_link_config(intel_encoder, - &lane, &link_bw); + intel_edp_link_config(has_edp_encoder, + &lane, &link_bw); } else { /* DP over FDI requires target mode clock instead of link clock */ @@ -3709,7 +3653,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, temp |= PIPE_8BPC; else temp |= PIPE_6BPC; - } else if (is_edp || (is_dp && intel_pch_has_edp(crtc))) { + } else if (has_edp_encoder || (is_dp && intel_pch_has_edp(crtc))) { switch (dev_priv->edp_bpp/3) { case 8: temp |= PIPE_8BPC; @@ -3782,7 +3726,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, udelay(200); - if (is_edp) { + if (has_edp_encoder) { if (dev_priv->lvds_use_ssc) { temp |= DREF_SSC1_ENABLE; I915_WRITE(PCH_DREF_CONTROL, temp); @@ -3931,7 +3875,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, dpll_reg = pch_dpll_reg; } - if (!is_edp) { + if (!has_edp_encoder) { I915_WRITE(fp_reg, fp); I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE); I915_READ(dpll_reg); @@ -4026,7 +3970,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, } } - if (!is_edp) { + if (!has_edp_encoder) { I915_WRITE(fp_reg, fp); I915_WRITE(dpll_reg, dpll); I915_READ(dpll_reg); @@ -4105,7 +4049,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc, I915_WRITE(link_m1_reg, m_n.link_m); I915_WRITE(link_n1_reg, m_n.link_n); - if (is_edp) { + if (has_edp_encoder) { ironlake_set_pll_edp(crtc, adjusted_mode->clock); } else { /* enable FDI RX PLL too */ @@ -4911,15 +4855,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) kfree(intel_crtc); } -struct intel_unpin_work { - struct work_struct work; - struct drm_device *dev; - struct drm_gem_object *old_fb_obj; - struct drm_gem_object *pending_flip_obj; - struct drm_pending_vblank_event *event; - int pending; -}; - static void intel_unpin_work_fn(struct work_struct *__work) { struct intel_unpin_work *work = @@ -5007,7 +4942,8 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane) spin_lock_irqsave(&dev->event_lock, flags); if (intel_crtc->unpin_work) { - intel_crtc->unpin_work->pending = 1; + if ((++intel_crtc->unpin_work->pending) > 1) + DRM_ERROR("Prepared flip multiple times\n"); } else { DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n"); } @@ -5026,9 +4962,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_unpin_work *work; unsigned long flags, offset; - int pipesrc_reg = (intel_crtc->pipe == 0) ? PIPEASRC : PIPEBSRC; - int ret, pipesrc; - u32 flip_mask; + int pipe = intel_crtc->pipe; + u32 pf, pipesrc; + int ret; work = kzalloc(sizeof *work, GFP_KERNEL); if (work == NULL) @@ -5077,42 +5013,73 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, atomic_inc(&obj_priv->pending_flip); work->pending_flip_obj = obj; - if (intel_crtc->plane) - flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; - else - flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - if (IS_GEN3(dev) || IS_GEN2(dev)) { + u32 flip_mask; + + if (intel_crtc->plane) + flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; + else + flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; + BEGIN_LP_RING(2); OUT_RING(MI_WAIT_FOR_EVENT | flip_mask); OUT_RING(0); ADVANCE_LP_RING(); } + work->enable_stall_check = true; + /* Offset into the new buffer for cases of shared fbs between CRTCs */ - offset = obj_priv->gtt_offset; - offset += (crtc->y * fb->pitch) + (crtc->x * (fb->bits_per_pixel) / 8); + offset = crtc->y * fb->pitch + crtc->x * fb->bits_per_pixel/8; BEGIN_LP_RING(4); - if (IS_I965G(dev)) { + switch(INTEL_INFO(dev)->gen) { + case 2: OUT_RING(MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); OUT_RING(fb->pitch); - OUT_RING(offset | obj_priv->tiling_mode); - pipesrc = I915_READ(pipesrc_reg); - OUT_RING(pipesrc & 0x0fff0fff); - } else if (IS_GEN3(dev)) { + OUT_RING(obj_priv->gtt_offset + offset); + OUT_RING(MI_NOOP); + break; + + case 3: OUT_RING(MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); OUT_RING(fb->pitch); - OUT_RING(offset); + OUT_RING(obj_priv->gtt_offset + offset); OUT_RING(MI_NOOP); - } else { + break; + + case 4: + case 5: + /* i965+ uses the linear or tiled offsets from the + * Display Registers (which do not change across a page-flip) + * so we need only reprogram the base address. + */ OUT_RING(MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); OUT_RING(fb->pitch); - OUT_RING(offset); - OUT_RING(MI_NOOP); + OUT_RING(obj_priv->gtt_offset | obj_priv->tiling_mode); + + /* XXX Enabling the panel-fitter across page-flip is so far + * untested on non-native modes, so ignore it for now. + * pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE; + */ + pf = 0; + pipesrc = I915_READ(pipe == 0 ? PIPEASRC : PIPEBSRC) & 0x0fff0fff; + OUT_RING(pf | pipesrc); + break; + + case 6: + OUT_RING(MI_DISPLAY_FLIP | + MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); + OUT_RING(fb->pitch | obj_priv->tiling_mode); + OUT_RING(obj_priv->gtt_offset); + + pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE; + pipesrc = I915_READ(pipe == 0 ? PIPEASRC : PIPEBSRC) & 0x0fff0fff; + OUT_RING(pf | pipesrc); + break; } ADVANCE_LP_RING(); @@ -5193,7 +5160,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base; intel_crtc->cursor_addr = 0; - intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF; + intel_crtc->dpms_mode = -1; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); intel_crtc->busy = false; @@ -5701,6 +5668,9 @@ void intel_init_clock_gating(struct drm_device *dev) I915_WRITE(DISP_ARB_CTL, (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); } /* * Based on the document from hardware guys the following bits @@ -5722,8 +5692,7 @@ void intel_init_clock_gating(struct drm_device *dev) ILK_DPFC_DIS2 | ILK_CLK_FBC); } - if (IS_GEN6(dev)) - return; + return; } else if (IS_G4X(dev)) { uint32_t dspclk_gate; I915_WRITE(RENCLK_GATE_D1, 0); @@ -5784,11 +5753,9 @@ void intel_init_clock_gating(struct drm_device *dev) OUT_RING(MI_FLUSH); ADVANCE_LP_RING(); } - } else { + } else DRM_DEBUG_KMS("Failed to allocate render context." - "Disable RC6\n"); - return; - } + "Disable RC6\n"); } if (I915_HAS_RC6(dev) && drm_core_check_feature(dev, DRIVER_MODESET)) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9caccd0..1a51ee0 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -239,7 +239,6 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, uint32_t ch_data = ch_ctl + 4; int i; int recv_bytes; - uint32_t ctl; uint32_t status; uint32_t aux_clock_divider; int try, precharge; @@ -263,41 +262,43 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, else precharge = 5; + if (I915_READ(ch_ctl) & DP_AUX_CH_CTL_SEND_BUSY) { + DRM_ERROR("dp_aux_ch not started status 0x%08x\n", + I915_READ(ch_ctl)); + return -EBUSY; + } + /* Must try at least 3 times according to DP spec */ for (try = 0; try < 5; try++) { /* Load the send data into the aux channel data registers */ - for (i = 0; i < send_bytes; i += 4) { - uint32_t d = pack_aux(send + i, send_bytes - i); - - I915_WRITE(ch_data + i, d); - } - - ctl = (DP_AUX_CH_CTL_SEND_BUSY | - DP_AUX_CH_CTL_TIME_OUT_400us | - (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | - (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) | - (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT) | - DP_AUX_CH_CTL_DONE | - DP_AUX_CH_CTL_TIME_OUT_ERROR | - DP_AUX_CH_CTL_RECEIVE_ERROR); + for (i = 0; i < send_bytes; i += 4) + I915_WRITE(ch_data + i, + pack_aux(send + i, send_bytes - i)); /* Send the command and wait for it to complete */ - I915_WRITE(ch_ctl, ctl); - (void) I915_READ(ch_ctl); + I915_WRITE(ch_ctl, + DP_AUX_CH_CTL_SEND_BUSY | + DP_AUX_CH_CTL_TIME_OUT_400us | + (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | + (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) | + (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT) | + DP_AUX_CH_CTL_DONE | + DP_AUX_CH_CTL_TIME_OUT_ERROR | + DP_AUX_CH_CTL_RECEIVE_ERROR); for (;;) { - udelay(100); status = I915_READ(ch_ctl); if ((status & DP_AUX_CH_CTL_SEND_BUSY) == 0) break; + udelay(100); } /* Clear done status and any errors */ - I915_WRITE(ch_ctl, (status | - DP_AUX_CH_CTL_DONE | - DP_AUX_CH_CTL_TIME_OUT_ERROR | - DP_AUX_CH_CTL_RECEIVE_ERROR)); - (void) I915_READ(ch_ctl); - if ((status & DP_AUX_CH_CTL_TIME_OUT_ERROR) == 0) + I915_WRITE(ch_ctl, + status | + DP_AUX_CH_CTL_DONE | + DP_AUX_CH_CTL_TIME_OUT_ERROR | + DP_AUX_CH_CTL_RECEIVE_ERROR); + if (status & DP_AUX_CH_CTL_DONE) break; } @@ -324,15 +325,12 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, /* Unload any bytes sent back from the other side */ recv_bytes = ((status & DP_AUX_CH_CTL_MESSAGE_SIZE_MASK) >> DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT); - if (recv_bytes > recv_size) recv_bytes = recv_size; - for (i = 0; i < recv_bytes; i += 4) { - uint32_t d = I915_READ(ch_data + i); - - unpack_aux(d, recv + i, recv_bytes - i); - } + for (i = 0; i < recv_bytes; i += 4) + unpack_aux(I915_READ(ch_data + i), + recv + i, recv_bytes - i); return recv_bytes; } @@ -1388,7 +1386,7 @@ ironlake_dp_detect(struct drm_connector *connector) * \return false if DP port is disconnected. */ static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector) +intel_dp_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0e92aa0..ad312ca 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -176,6 +176,16 @@ struct intel_crtc { #define enc_to_intel_encoder(x) container_of(x, struct intel_encoder, enc) #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base) +struct intel_unpin_work { + struct work_struct work; + struct drm_device *dev; + struct drm_gem_object *old_fb_obj; + struct drm_gem_object *pending_flip_obj; + struct drm_pending_vblank_event *event; + int pending; + bool enable_stall_check; +}; + struct i2c_adapter *intel_i2c_create(struct drm_device *dev, const u32 reg, const char *name); void intel_i2c_destroy(struct i2c_adapter *adapter); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index a399f4b..7c9ec14 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -221,7 +221,8 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder, * * Unimplemented. */ -static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_dvo_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ccd4c97..926934a 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -139,7 +139,7 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector) +intel_hdmi_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index b819c10..6ec39a8 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -445,7 +445,8 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * connected and closed means disconnected. We also send hotplug events as * needed, using lid status notification from the input layer. */ -static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_lvds_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; enum drm_connector_status status = connector_status_connected; @@ -540,7 +541,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, * the LID nofication event. */ if (connector) - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, + false); + /* Don't force modeset on machines where it causes a GPU lockup */ if (dmi_check_system(intel_no_modeset_on_lid)) return NOTIFY_OK; @@ -875,8 +878,6 @@ void intel_lvds_init(struct drm_device *dev) intel_encoder->clone_mask = (1 << INTEL_LVDS_CLONE_BIT); intel_encoder->crtc_mask = (1 << 1); - if (IS_I965G(dev)) - intel_encoder->crtc_mask |= (1 << 0); drm_encoder_helper_add(encoder, &intel_lvds_helper_funcs); drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 4f00390..1d306a4 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -25,6 +25,8 @@ * * Derived from Xorg ddx, xf86-video-intel, src/i830_video.c */ + +#include #include "drmP.h" #include "drm.h" #include "i915_drm.h" diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 51e9c9e7..cb3508f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -220,9 +220,13 @@ static int init_render_ring(struct drm_device *dev, { drm_i915_private_t *dev_priv = dev->dev_private; int ret = init_ring_common(dev, ring); + int mode; + if (IS_I9XX(dev) && !IS_GEN3(dev)) { - I915_WRITE(MI_MODE, - (VS_TIMER_DISPATCH) << 16 | VS_TIMER_DISPATCH); + mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH; + if (IS_GEN6(dev)) + mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; + I915_WRITE(MI_MODE, mode); } return ret; } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 093e914..e8e902d 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1061,8 +1061,9 @@ static bool intel_sdvo_mode_fixup(struct drm_encoder *encoder, if (!intel_sdvo_set_output_timings_from_mode(intel_sdvo, mode)) return false; - if (!intel_sdvo_set_input_timings_for_mode(intel_sdvo, mode, adjusted_mode)) - return false; + (void) intel_sdvo_set_input_timings_for_mode(intel_sdvo, + mode, + adjusted_mode); } else if (intel_sdvo->is_lvds) { drm_mode_set_crtcinfo(intel_sdvo->sdvo_lvds_fixed_mode, 0); @@ -1070,8 +1071,9 @@ static bool intel_sdvo_mode_fixup(struct drm_encoder *encoder, intel_sdvo->sdvo_lvds_fixed_mode)) return false; - if (!intel_sdvo_set_input_timings_for_mode(intel_sdvo, mode, adjusted_mode)) - return false; + (void) intel_sdvo_set_input_timings_for_mode(intel_sdvo, + mode, + adjusted_mode); } /* Make the CRTC code factor in the SDVO pixel multiplier. The @@ -1108,10 +1110,9 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder, in_out.in0 = intel_sdvo->attached_output; in_out.in1 = 0; - if (!intel_sdvo_set_value(intel_sdvo, - SDVO_CMD_SET_IN_OUT_MAP, - &in_out, sizeof(in_out))) - return; + intel_sdvo_set_value(intel_sdvo, + SDVO_CMD_SET_IN_OUT_MAP, + &in_out, sizeof(in_out)); if (intel_sdvo->is_hdmi) { if (!intel_sdvo_set_avi_infoframe(intel_sdvo, mode)) @@ -1122,11 +1123,9 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder, /* We have tried to get input timing in mode_fixup, and filled into adjusted_mode */ - if (intel_sdvo->is_tv || intel_sdvo->is_lvds) { - intel_sdvo_get_dtd_from_mode(&input_dtd, adjusted_mode); + intel_sdvo_get_dtd_from_mode(&input_dtd, adjusted_mode); + if (intel_sdvo->is_tv || intel_sdvo->is_lvds) input_dtd.part2.sdvo_flags = intel_sdvo->sdvo_flags; - } else - intel_sdvo_get_dtd_from_mode(&input_dtd, mode); /* If it's a TV, we already set the output timing in mode_fixup. * Otherwise, the output timing is equal to the input timing. @@ -1137,8 +1136,7 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder, intel_sdvo->attached_output)) return; - if (!intel_sdvo_set_output_timing(intel_sdvo, &input_dtd)) - return; + (void) intel_sdvo_set_output_timing(intel_sdvo, &input_dtd); } /* Set the input timing to the screen. Assume always input 0. */ @@ -1165,8 +1163,7 @@ static void intel_sdvo_mode_set(struct drm_encoder *encoder, intel_sdvo_set_input_timing(encoder, &input_dtd); } #else - if (!intel_sdvo_set_input_timing(intel_sdvo, &input_dtd)) - return; + (void) intel_sdvo_set_input_timing(intel_sdvo, &input_dtd); #endif sdvo_pixel_multiply = intel_sdvo_get_pixel_multiplier(mode); @@ -1420,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev) if (!analog_connector) return false; - if (analog_connector->funcs->detect(analog_connector) == + if (analog_connector->funcs->detect(analog_connector, false) == connector_status_disconnected) return false; @@ -1489,7 +1486,8 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) return status; } -static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_sdvo_detect(struct drm_connector *connector, bool force) { uint16_t response; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1932,6 +1930,41 @@ static const struct drm_encoder_funcs intel_sdvo_enc_funcs = { .destroy = intel_sdvo_enc_destroy, }; +static void +intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) +{ + uint16_t mask = 0; + unsigned int num_bits; + + /* Make a mask of outputs less than or equal to our own priority in the + * list. + */ + switch (sdvo->controlled_output) { + case SDVO_OUTPUT_LVDS1: + mask |= SDVO_OUTPUT_LVDS1; + case SDVO_OUTPUT_LVDS0: + mask |= SDVO_OUTPUT_LVDS0; + case SDVO_OUTPUT_TMDS1: + mask |= SDVO_OUTPUT_TMDS1; + case SDVO_OUTPUT_TMDS0: + mask |= SDVO_OUTPUT_TMDS0; + case SDVO_OUTPUT_RGB1: + mask |= SDVO_OUTPUT_RGB1; + case SDVO_OUTPUT_RGB0: + mask |= SDVO_OUTPUT_RGB0; + break; + } + + /* Count bits to find what number we are in the priority list. */ + mask &= sdvo->caps.output_flags; + num_bits = hweight16(mask); + /* If more than 3 outputs, default to DDC bus 3 for now. */ + if (num_bits > 3) + num_bits = 3; + + /* Corresponds to SDVO_CONTROL_BUS_DDCx */ + sdvo->ddc_bus = 1 << num_bits; +} /** * Choose the appropriate DDC bus for control bus switch command for this @@ -1951,7 +1984,10 @@ intel_sdvo_select_ddc_bus(struct drm_i915_private *dev_priv, else mapping = &(dev_priv->sdvo_mappings[1]); - sdvo->ddc_bus = 1 << ((mapping->ddc_pin & 0xf0) >> 4); + if (mapping->initialized) + sdvo->ddc_bus = 1 << ((mapping->ddc_pin & 0xf0) >> 4); + else + intel_sdvo_guess_ddc_bus(sdvo); } static bool diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index d2029ef..4a117e3 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1231,7 +1231,6 @@ intel_tv_detect_type (struct intel_tv *intel_tv) struct drm_encoder *encoder = &intel_tv->base.enc; struct drm_device *dev = encoder->dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc); unsigned long irqflags; u32 tv_ctl, save_tv_ctl; u32 tv_dac, save_tv_dac; @@ -1268,11 +1267,15 @@ intel_tv_detect_type (struct intel_tv *intel_tv) DAC_C_0_7_V); I915_WRITE(TV_CTL, tv_ctl); I915_WRITE(TV_DAC, tv_dac); - intel_wait_for_vblank(dev, intel_crtc->pipe); + POSTING_READ(TV_DAC); + msleep(20); + tv_dac = I915_READ(TV_DAC); I915_WRITE(TV_DAC, save_tv_dac); I915_WRITE(TV_CTL, save_tv_ctl); - intel_wait_for_vblank(dev, intel_crtc->pipe); + POSTING_READ(TV_CTL); + msleep(20); + /* * A B C * 0 1 1 Composite @@ -1338,7 +1341,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * we have a pipe programmed in order to probe the TV. */ static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector) +intel_tv_detect(struct drm_connector *connector, bool force) { struct drm_display_mode mode; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1350,7 +1353,7 @@ intel_tv_detect(struct drm_connector *connector) if (encoder->crtc && encoder->crtc->enabled) { type = intel_tv_detect_type(intel_tv); - } else { + } else if (force) { struct drm_crtc *crtc; int dpms_mode; @@ -1361,10 +1364,9 @@ intel_tv_detect(struct drm_connector *connector) intel_release_load_detect_pipe(&intel_tv->base, connector, dpms_mode); } else - type = -1; - } - - intel_tv->type = type; + return connector_status_unknown; + } else + return connector->status; if (type < 0) return connector_status_disconnected; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index a1473ff..87186a4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -168,7 +168,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector, } static enum drm_connector_status -nouveau_connector_detect(struct drm_connector *connector) +nouveau_connector_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); @@ -246,7 +246,7 @@ detect_analog: } static enum drm_connector_status -nouveau_connector_detect_lvds(struct drm_connector *connector) +nouveau_connector_detect_lvds(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -267,7 +267,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector) /* Try retrieving EDID via DDC */ if (!dev_priv->vbios.fp_no_ddc) { - status = nouveau_connector_detect(connector); + status = nouveau_connector_detect(connector, force); if (status == connector_status_connected) goto out; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 6b208ff..87ac21e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -64,16 +64,17 @@ nouveau_fence_update(struct nouveau_channel *chan) struct nouveau_fence *fence; uint32_t sequence; + spin_lock(&chan->fence.lock); + if (USE_REFCNT) sequence = nvchan_rd32(chan, 0x48); else sequence = atomic_read(&chan->fence.last_sequence_irq); if (chan->fence.sequence_ack == sequence) - return; + goto out; chan->fence.sequence_ack = sequence; - spin_lock(&chan->fence.lock); list_for_each_safe(entry, tmp, &chan->fence.pending) { fence = list_entry(entry, struct nouveau_fence, entry); @@ -85,6 +86,7 @@ nouveau_fence_update(struct nouveau_channel *chan) if (sequence == chan->fence.sequence_ack) break; } +out: spin_unlock(&chan->fence.lock); } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 581c67c..ead7b8f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -245,7 +245,7 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence) list_del(&nvbo->entry); nvbo->reserved_by = NULL; ttm_bo_unreserve(&nvbo->bo); - drm_gem_object_unreference(nvbo->gem); + drm_gem_object_unreference_unlocked(nvbo->gem); } } @@ -300,7 +300,7 @@ retry: validate_fini(op, NULL); if (ret == -EAGAIN) ret = ttm_bo_wait_unreserved(&nvbo->bo, false); - drm_gem_object_unreference(gem); + drm_gem_object_unreference_unlocked(gem); if (ret) { NV_ERROR(dev, "fail reserve\n"); return ret; @@ -616,8 +616,6 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, return PTR_ERR(bo); } - mutex_lock(&dev->struct_mutex); - /* Mark push buffers as being used on PFIFO, the validation code * will then make sure that if the pushbuf bo moves, that they * happen on the kernel channel, which will in turn cause a sync @@ -731,7 +729,6 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data, out: validate_fini(&op, fence); nouveau_fence_unref((void**)&fence); - mutex_unlock(&dev->struct_mutex); kfree(bo); kfree(push); diff --git a/drivers/gpu/drm/nouveau/nv50_instmem.c b/drivers/gpu/drm/nouveau/nv50_instmem.c index c95bf9b..91ef93c 100644 --- a/drivers/gpu/drm/nouveau/nv50_instmem.c +++ b/drivers/gpu/drm/nouveau/nv50_instmem.c @@ -139,6 +139,8 @@ nv50_instmem_init(struct drm_device *dev) chan->file_priv = (struct drm_file *)-2; dev_priv->fifos[0] = dev_priv->fifos[127] = chan; + INIT_LIST_HEAD(&chan->ramht_refs); + /* Channel's PRAMIN object + heap */ ret = nouveau_gpuobj_new_fake(dev, 0, c_offset, c_size, 0, NULL, &chan->ramin); diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 577239a..cd0290f 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -332,6 +332,11 @@ static void atombios_crtc_set_timing(struct drm_crtc *crtc, args.usV_SyncWidth = cpu_to_le16(mode->crtc_vsync_end - mode->crtc_vsync_start); + args.ucOverscanRight = radeon_crtc->h_border; + args.ucOverscanLeft = radeon_crtc->h_border; + args.ucOverscanBottom = radeon_crtc->v_border; + args.ucOverscanTop = radeon_crtc->v_border; + if (mode->flags & DRM_MODE_FLAG_NVSYNC) misc |= ATOM_VSYNC_POLARITY; if (mode->flags & DRM_MODE_FLAG_NHSYNC) @@ -534,6 +539,21 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, pll->algo = PLL_ALGO_LEGACY; pll->flags |= RADEON_PLL_PREFER_CLOSEST_LOWER; } + /* There is some evidence (often anecdotal) that RV515/RV620 LVDS + * (on some boards at least) prefers the legacy algo. I'm not + * sure whether this should handled generically or on a + * case-by-case quirk basis. Both algos should work fine in the + * majority of cases. + */ + if ((radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) && + ((rdev->family == CHIP_RV515) || + (rdev->family == CHIP_RV620))) { + /* allow the user to overrride just in case */ + if (radeon_new_pll == 1) + pll->algo = PLL_ALGO_NEW; + else + pll->algo = PLL_ALGO_LEGACY; + } } else { if (encoder->encoder_type != DRM_MODE_ENCODER_DAC) pll->flags |= RADEON_PLL_NO_ODD_POST_DIV; @@ -1056,11 +1076,11 @@ static int avivo_crtc_set_base(struct drm_crtc *crtc, int x, int y, if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) { - WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, 0); - WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, 0); + WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(fb_location)); + WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(fb_location)); } else { - WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, 0); - WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, 0); + WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH, upper_32_bits(fb_location)); + WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH, upper_32_bits(fb_location)); } } WREG32(AVIVO_D1GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset, @@ -1197,8 +1217,18 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc, struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct drm_device *dev = crtc->dev; struct radeon_device *rdev = dev->dev_private; + struct drm_encoder *encoder; + bool is_tvcv = false; - /* TODO color tiling */ + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + /* find tv std */ + if (encoder->crtc == crtc) { + struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + if (radeon_encoder->active_device & + (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT)) + is_tvcv = true; + } + } atombios_disable_ss(crtc); /* always set DCPLL */ @@ -1207,9 +1237,14 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc, atombios_crtc_set_pll(crtc, adjusted_mode); atombios_enable_ss(crtc); - if (ASIC_IS_AVIVO(rdev)) + if (ASIC_IS_DCE4(rdev)) atombios_set_crtc_dtd_timing(crtc, adjusted_mode); - else { + else if (ASIC_IS_AVIVO(rdev)) { + if (is_tvcv) + atombios_crtc_set_timing(crtc, adjusted_mode); + else + atombios_set_crtc_dtd_timing(crtc, adjusted_mode); + } else { atombios_crtc_set_timing(crtc, adjusted_mode); if (radeon_crtc->crtc_id == 0) atombios_set_crtc_dtd_timing(crtc, adjusted_mode); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 957d506..79082d4 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -675,6 +675,43 @@ static int evergreen_cp_load_microcode(struct radeon_device *rdev) return 0; } +static int evergreen_cp_start(struct radeon_device *rdev) +{ + int r; + uint32_t cp_me; + + r = radeon_ring_lock(rdev, 7); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); + radeon_ring_write(rdev, 0x1); + radeon_ring_write(rdev, 0x0); + radeon_ring_write(rdev, rdev->config.evergreen.max_hw_contexts - 1); + radeon_ring_write(rdev, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); + radeon_ring_write(rdev, 0); + radeon_ring_write(rdev, 0); + radeon_ring_unlock_commit(rdev); + + cp_me = 0xff; + WREG32(CP_ME_CNTL, cp_me); + + r = radeon_ring_lock(rdev, 4); + if (r) { + DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r); + return r; + } + /* init some VGT regs */ + radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + radeon_ring_write(rdev, (VGT_VERTEX_REUSE_BLOCK_CNTL - PACKET3_SET_CONTEXT_REG_START) >> 2); + radeon_ring_write(rdev, 0xe); + radeon_ring_write(rdev, 0x10); + radeon_ring_unlock_commit(rdev); + + return 0; +} + int evergreen_cp_resume(struct radeon_device *rdev) { u32 tmp; @@ -719,7 +756,7 @@ int evergreen_cp_resume(struct radeon_device *rdev) rdev->cp.rptr = RREG32(CP_RB_RPTR); rdev->cp.wptr = RREG32(CP_RB_WPTR); - r600_cp_start(rdev); + evergreen_cp_start(rdev); rdev->cp.ready = true; r = radeon_ring_test(rdev); if (r) { @@ -1123,14 +1160,25 @@ static void evergreen_gpu_init(struct radeon_device *rdev) EVERGREEN_MAX_BACKENDS_MASK)); break; } - } else - gb_backend_map = - evergreen_get_tile_pipe_to_backend_map(rdev, - rdev->config.evergreen.max_tile_pipes, - rdev->config.evergreen.max_backends, - ((EVERGREEN_MAX_BACKENDS_MASK << - rdev->config.evergreen.max_backends) & - EVERGREEN_MAX_BACKENDS_MASK)); + } else { + switch (rdev->family) { + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + gb_backend_map = 0x66442200; + break; + case CHIP_JUNIPER: + gb_backend_map = 0x00006420; + break; + default: + gb_backend_map = + evergreen_get_tile_pipe_to_backend_map(rdev, + rdev->config.evergreen.max_tile_pipes, + rdev->config.evergreen.max_backends, + ((EVERGREEN_MAX_BACKENDS_MASK << + rdev->config.evergreen.max_backends) & + EVERGREEN_MAX_BACKENDS_MASK)); + } + } rdev->config.evergreen.tile_config = gb_addr_config; WREG32(GB_BACKEND_MAP, gb_backend_map); @@ -2054,11 +2102,6 @@ int evergreen_resume(struct radeon_device *rdev) */ /* post card */ atom_asic_init(rdev->mode_info.atom_context); - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - return r; - } r = evergreen_startup(rdev); if (r) { @@ -2164,9 +2207,6 @@ int evergreen_init(struct radeon_device *rdev) radeon_surface_init(rdev); /* Initialize clocks */ radeon_get_clock_info(rdev->ddev); - r = radeon_clocks_init(rdev); - if (r) - return r; /* Fence driver */ r = radeon_fence_driver_init(rdev); if (r) @@ -2236,7 +2276,6 @@ void evergreen_fini(struct radeon_device *rdev) evergreen_pcie_gart_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); - radeon_clocks_fini(rdev); radeon_agp_fini(rdev); radeon_bo_fini(rdev); radeon_atombios_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index e817a0b..e151f16 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -2020,18 +2020,7 @@ bool r100_gpu_cp_is_lockup(struct radeon_device *rdev, struct r100_gpu_lockup *l return false; } elapsed = jiffies_to_msecs(cjiffies - lockup->last_jiffies); - if (elapsed >= 3000) { - /* very likely the improbable case where current - * rptr is equal to last recorded, a while ago, rptr - * this is more likely a false positive update tracking - * information which should force us to be recall at - * latter point - */ - lockup->last_cp_rptr = cp->rptr; - lockup->last_jiffies = jiffies; - return false; - } - if (elapsed >= 1000) { + if (elapsed >= 10000) { dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed); return true; } @@ -3308,13 +3297,14 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) unsigned long size; unsigned prim_walk; unsigned nverts; + unsigned num_cb = track->num_cb; - for (i = 0; i < track->num_cb; i++) { + if (!track->zb_cb_clear && !track->color_channel_mask && + !track->blend_read_enable) + num_cb = 0; + + for (i = 0; i < num_cb; i++) { if (track->cb[i].robj == NULL) { - if (!(track->zb_cb_clear || track->color_channel_mask || - track->blend_read_enable)) { - continue; - } DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); return -EINVAL; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index d0ebae9..afc18d8 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2119,10 +2119,7 @@ int r600_cp_start(struct radeon_device *rdev) } radeon_ring_write(rdev, PACKET3(PACKET3_ME_INITIALIZE, 5)); radeon_ring_write(rdev, 0x1); - if (rdev->family >= CHIP_CEDAR) { - radeon_ring_write(rdev, 0x0); - radeon_ring_write(rdev, rdev->config.evergreen.max_hw_contexts - 1); - } else if (rdev->family >= CHIP_RV770) { + if (rdev->family >= CHIP_RV770) { radeon_ring_write(rdev, 0x0); radeon_ring_write(rdev, rdev->config.rv770.max_hw_contexts - 1); } else { @@ -2489,11 +2486,6 @@ int r600_resume(struct radeon_device *rdev) */ /* post card */ atom_asic_init(rdev->mode_info.atom_context); - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - return r; - } r = r600_startup(rdev); if (r) { @@ -2586,9 +2578,6 @@ int r600_init(struct radeon_device *rdev) radeon_surface_init(rdev); /* Initialize clocks */ radeon_get_clock_info(rdev->ddev); - r = radeon_clocks_init(rdev); - if (r) - return r; /* Fence driver */ r = radeon_fence_driver_init(rdev); if (r) @@ -2663,7 +2652,6 @@ void r600_fini(struct radeon_device *rdev) radeon_agp_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); - radeon_clocks_fini(rdev); radeon_bo_fini(rdev); radeon_atombios_fini(rdev); kfree(rdev->bios); @@ -3541,7 +3529,7 @@ void r600_ioctl_wait_idle(struct radeon_device *rdev, struct radeon_bo *bo) * rather than write to HDP_REG_COHERENCY_FLUSH_CNTL */ if ((rdev->family >= CHIP_RV770) && (rdev->family <= CHIP_RV740)) { - void __iomem *ptr = (void *)rdev->gart.table.vram.ptr; + void __iomem *ptr = (void *)rdev->vram_scratch.ptr; u32 tmp; WREG32(HDP_DEBUG1, 0); diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c index d13622a..9ceb2a1 100644 --- a/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -1,3 +1,28 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + #include "drmP.h" #include "drm.h" #include "radeon_drm.h" diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h index fdc3b37..f437d36 100644 --- a/drivers/gpu/drm/radeon/r600_blit_shaders.h +++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h @@ -1,3 +1,27 @@ +/* + * Copyright 2009 Advanced Micro Devices, Inc. + * Copyright 2009 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ #ifndef R600_BLIT_SHADERS_H #define R600_BLIT_SHADERS_H diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index d886494..250a3a9 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -1170,9 +1170,8 @@ static inline int r600_check_texture_resource(struct radeon_cs_parser *p, u32 i /* using get ib will give us the offset into the mipmap bo */ word0 = radeon_get_ib_value(p, idx + 3) << 8; if ((mipmap_size + word0) > radeon_bo_size(mipmap)) { - dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", - w0, h0, bpe, blevel, nlevels, word0, mipmap_size, radeon_bo_size(texture)); - return -EINVAL; + /*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n", + w0, h0, bpe, blevel, nlevels, word0, mipmap_size, radeon_bo_size(texture));*/ } return 0; } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3dfcfa3..a168d64 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1013,6 +1013,11 @@ int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +/* VRAM scratch page for HDP bug */ +struct r700_vram_scratch { + struct radeon_bo *robj; + volatile uint32_t *ptr; +}; /* * Core structure, functions and helpers. @@ -1079,6 +1084,7 @@ struct radeon_device { const struct firmware *pfp_fw; /* r6/700 PFP firmware */ const struct firmware *rlc_fw; /* r6/700 RLC firmware */ struct r600_blit r600_blit; + struct r700_vram_scratch vram_scratch; int msi_enabled; /* msi enabled */ struct r600_ih ih; /* r6/700 interrupt ring */ struct workqueue_struct *wq; @@ -1333,8 +1339,6 @@ extern bool radeon_card_posted(struct radeon_device *rdev); extern void radeon_update_bandwidth_info(struct radeon_device *rdev); extern void radeon_update_display_priority(struct radeon_device *rdev); extern bool radeon_boot_test_post_card(struct radeon_device *rdev); -extern int radeon_clocks_init(struct radeon_device *rdev); -extern void radeon_clocks_fini(struct radeon_device *rdev); extern void radeon_scratch_init(struct radeon_device *rdev); extern void radeon_surface_init(struct radeon_device *rdev); extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index a21bf88..25e1dd1 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -858,21 +858,3 @@ int radeon_asic_init(struct radeon_device *rdev) return 0; } -/* - * Wrapper around modesetting bits. Move to radeon_clocks.c? - */ -int radeon_clocks_init(struct radeon_device *rdev) -{ - int r; - - r = radeon_static_clocks_init(rdev->ddev); - if (r) { - return r; - } - DRM_INFO("Clocks initialized !\n"); - return 0; -} - -void radeon_clocks_fini(struct radeon_device *rdev) -{ -} diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 6114198..ebae14c 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -85,6 +85,19 @@ static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_dev for (i = 0; i < num_indices; i++) { gpio = &i2c_info->asGPIO_Info[i]; + /* some evergreen boards have bad data for this entry */ + if (ASIC_IS_DCE4(rdev)) { + if ((i == 7) && + (gpio->usClkMaskRegisterIndex == 0x1936) && + (gpio->sucI2cId.ucAccess == 0)) { + gpio->sucI2cId.ucAccess = 0x97; + gpio->ucDataMaskShift = 8; + gpio->ucDataEnShift = 8; + gpio->ucDataY_Shift = 8; + gpio->ucDataA_Shift = 8; + } + } + if (gpio->sucI2cId.ucAccess == id) { i2c.mask_clk_reg = le16_to_cpu(gpio->usClkMaskRegisterIndex) * 4; i2c.mask_data_reg = le16_to_cpu(gpio->usDataMaskRegisterIndex) * 4; @@ -147,6 +160,20 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev) for (i = 0; i < num_indices; i++) { gpio = &i2c_info->asGPIO_Info[i]; i2c.valid = false; + + /* some evergreen boards have bad data for this entry */ + if (ASIC_IS_DCE4(rdev)) { + if ((i == 7) && + (gpio->usClkMaskRegisterIndex == 0x1936) && + (gpio->sucI2cId.ucAccess == 0)) { + gpio->sucI2cId.ucAccess = 0x97; + gpio->ucDataMaskShift = 8; + gpio->ucDataEnShift = 8; + gpio->ucDataY_Shift = 8; + gpio->ucDataA_Shift = 8; + } + } + i2c.mask_clk_reg = le16_to_cpu(gpio->usClkMaskRegisterIndex) * 4; i2c.mask_data_reg = le16_to_cpu(gpio->usDataMaskRegisterIndex) * 4; i2c.en_clk_reg = le16_to_cpu(gpio->usClkEnRegisterIndex) * 4; diff --git a/drivers/gpu/drm/radeon/radeon_clocks.c b/drivers/gpu/drm/radeon/radeon_clocks.c index 14448a7..5249af8 100644 --- a/drivers/gpu/drm/radeon/radeon_clocks.c +++ b/drivers/gpu/drm/radeon/radeon_clocks.c @@ -327,6 +327,14 @@ void radeon_get_clock_info(struct drm_device *dev) mpll->max_feedback_div = 0xff; mpll->best_vco = 0; + if (!rdev->clock.default_sclk) + rdev->clock.default_sclk = radeon_get_engine_clock(rdev); + if ((!rdev->clock.default_mclk) && rdev->asic->get_memory_clock) + rdev->clock.default_mclk = radeon_get_memory_clock(rdev); + + rdev->pm.current_sclk = rdev->clock.default_sclk; + rdev->pm.current_mclk = rdev->clock.default_mclk; + } /* 10 khz */ @@ -897,53 +905,3 @@ void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable) } } -static void radeon_apply_clock_quirks(struct radeon_device *rdev) -{ - uint32_t tmp; - - /* XXX make sure engine is idle */ - - if (rdev->family < CHIP_RS600) { - tmp = RREG32_PLL(RADEON_SCLK_CNTL); - if (ASIC_IS_R300(rdev) || ASIC_IS_RV100(rdev)) - tmp |= RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_VIP; - if ((rdev->family == CHIP_RV250) - || (rdev->family == CHIP_RV280)) - tmp |= - RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_DISP2; - if ((rdev->family == CHIP_RV350) - || (rdev->family == CHIP_RV380)) - tmp |= R300_SCLK_FORCE_VAP; - if (rdev->family == CHIP_R420) - tmp |= R300_SCLK_FORCE_PX | R300_SCLK_FORCE_TX; - WREG32_PLL(RADEON_SCLK_CNTL, tmp); - } else if (rdev->family < CHIP_R600) { - tmp = RREG32_PLL(AVIVO_CP_DYN_CNTL); - tmp |= AVIVO_CP_FORCEON; - WREG32_PLL(AVIVO_CP_DYN_CNTL, tmp); - - tmp = RREG32_PLL(AVIVO_E2_DYN_CNTL); - tmp |= AVIVO_E2_FORCEON; - WREG32_PLL(AVIVO_E2_DYN_CNTL, tmp); - - tmp = RREG32_PLL(AVIVO_IDCT_DYN_CNTL); - tmp |= AVIVO_IDCT_FORCEON; - WREG32_PLL(AVIVO_IDCT_DYN_CNTL, tmp); - } -} - -int radeon_static_clocks_init(struct drm_device *dev) -{ - struct radeon_device *rdev = dev->dev_private; - - /* XXX make sure engine is idle */ - - if (radeon_dynclks != -1) { - if (radeon_dynclks) { - if (rdev->asic->set_clock_gating) - radeon_set_clock_gating(rdev, 1); - } - } - radeon_apply_clock_quirks(rdev); - return 0; -} diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c index bd74e42..a04b7a6 100644 --- a/drivers/gpu/drm/radeon/radeon_combios.c +++ b/drivers/gpu/drm/radeon/radeon_combios.c @@ -1485,6 +1485,11 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev) /* PowerMac8,1 ? */ /* imac g5 isight */ rdev->mode_info.connector_table = CT_IMAC_G5_ISIGHT; + } else if ((rdev->pdev->device == 0x4a48) && + (rdev->pdev->subsystem_vendor == 0x1002) && + (rdev->pdev->subsystem_device == 0x4a48)) { + /* Mac X800 */ + rdev->mode_info.connector_table = CT_MAC_X800; } else #endif /* CONFIG_PPC_PMAC */ #ifdef CONFIG_PPC64 @@ -1961,6 +1966,48 @@ bool radeon_get_legacy_connector_info_from_table(struct drm_device *dev) CONNECTOR_OBJECT_ID_VGA, &hpd); break; + case CT_MAC_X800: + DRM_INFO("Connector Table: %d (mac x800)\n", + rdev->mode_info.connector_table); + /* DVI - primary dac, internal tmds */ + ddc_i2c = combios_setup_i2c_bus(rdev, DDC_DVI, 0, 0); + hpd.hpd = RADEON_HPD_1; /* ??? */ + radeon_add_legacy_encoder(dev, + radeon_get_encoder_enum(dev, + ATOM_DEVICE_DFP1_SUPPORT, + 0), + ATOM_DEVICE_DFP1_SUPPORT); + radeon_add_legacy_encoder(dev, + radeon_get_encoder_enum(dev, + ATOM_DEVICE_CRT1_SUPPORT, + 1), + ATOM_DEVICE_CRT1_SUPPORT); + radeon_add_legacy_connector(dev, 0, + ATOM_DEVICE_DFP1_SUPPORT | + ATOM_DEVICE_CRT1_SUPPORT, + DRM_MODE_CONNECTOR_DVII, &ddc_i2c, + CONNECTOR_OBJECT_ID_SINGLE_LINK_DVI_I, + &hpd); + /* DVI - tv dac, dvo */ + ddc_i2c = combios_setup_i2c_bus(rdev, DDC_MONID, 0, 0); + hpd.hpd = RADEON_HPD_2; /* ??? */ + radeon_add_legacy_encoder(dev, + radeon_get_encoder_enum(dev, + ATOM_DEVICE_DFP2_SUPPORT, + 0), + ATOM_DEVICE_DFP2_SUPPORT); + radeon_add_legacy_encoder(dev, + radeon_get_encoder_enum(dev, + ATOM_DEVICE_CRT2_SUPPORT, + 2), + ATOM_DEVICE_CRT2_SUPPORT); + radeon_add_legacy_connector(dev, 1, + ATOM_DEVICE_DFP2_SUPPORT | + ATOM_DEVICE_CRT2_SUPPORT, + DRM_MODE_CONNECTOR_DVII, &ddc_i2c, + CONNECTOR_OBJECT_ID_DUAL_LINK_DVI_I, + &hpd); + break; default: DRM_INFO("Connector table: %d (invalid)\n", rdev->mode_info.connector_table); diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 1a5ee39..ecc1a8f 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -481,7 +481,8 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_lvds_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -594,7 +595,8 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_vga_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; @@ -691,7 +693,8 @@ static int radeon_tv_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_tv_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_tv_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -748,7 +751,8 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) * we have to check if this analog encoder is shared with anyone else (TV) * if its shared we have to set the other connector to disconnected. */ -static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dvi_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; @@ -972,7 +976,8 @@ static int radeon_dp_get_modes(struct drm_connector *connector) return ret; } -static enum drm_connector_status radeon_dp_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dp_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; @@ -1051,10 +1056,16 @@ radeon_add_atom_connector(struct drm_device *dev, uint32_t subpixel_order = SubPixelNone; bool shared_ddc = false; - /* fixme - tv/cv/din */ if (connector_type == DRM_MODE_CONNECTOR_Unknown) return; + /* if the user selected tv=0 don't try and add the connector */ + if (((connector_type == DRM_MODE_CONNECTOR_SVIDEO) || + (connector_type == DRM_MODE_CONNECTOR_Composite) || + (connector_type == DRM_MODE_CONNECTOR_9PinDIN)) && + (radeon_tv == 0)) + return; + /* see if we already added it */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { radeon_connector = to_radeon_connector(connector); @@ -1209,19 +1220,17 @@ radeon_add_atom_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_Composite: case DRM_MODE_CONNECTOR_9PinDIN: - if (radeon_tv == 1) { - drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type); - drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs); - radeon_connector->dac_load_detect = true; - drm_connector_attach_property(&radeon_connector->base, - rdev->mode_info.load_detect_property, - 1); - drm_connector_attach_property(&radeon_connector->base, - rdev->mode_info.tv_std_property, - radeon_atombios_get_tv_info(rdev)); - /* no HPD on analog connectors */ - radeon_connector->hpd.hpd = RADEON_HPD_NONE; - } + drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs); + radeon_connector->dac_load_detect = true; + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.load_detect_property, + 1); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.tv_std_property, + radeon_atombios_get_tv_info(rdev)); + /* no HPD on analog connectors */ + radeon_connector->hpd.hpd = RADEON_HPD_NONE; break; case DRM_MODE_CONNECTOR_LVDS: radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL); @@ -1272,10 +1281,16 @@ radeon_add_legacy_connector(struct drm_device *dev, struct radeon_connector *radeon_connector; uint32_t subpixel_order = SubPixelNone; - /* fixme - tv/cv/din */ if (connector_type == DRM_MODE_CONNECTOR_Unknown) return; + /* if the user selected tv=0 don't try and add the connector */ + if (((connector_type == DRM_MODE_CONNECTOR_SVIDEO) || + (connector_type == DRM_MODE_CONNECTOR_Composite) || + (connector_type == DRM_MODE_CONNECTOR_9PinDIN)) && + (radeon_tv == 0)) + return; + /* see if we already added it */ list_for_each_entry(connector, &dev->mode_config.connector_list, head) { radeon_connector = to_radeon_connector(connector); @@ -1347,26 +1362,24 @@ radeon_add_legacy_connector(struct drm_device *dev, case DRM_MODE_CONNECTOR_SVIDEO: case DRM_MODE_CONNECTOR_Composite: case DRM_MODE_CONNECTOR_9PinDIN: - if (radeon_tv == 1) { - drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type); - drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs); - radeon_connector->dac_load_detect = true; - /* RS400,RC410,RS480 chipset seems to report a lot - * of false positive on load detect, we haven't yet - * found a way to make load detect reliable on those - * chipset, thus just disable it for TV. - */ - if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) - radeon_connector->dac_load_detect = false; - drm_connector_attach_property(&radeon_connector->base, - rdev->mode_info.load_detect_property, - radeon_connector->dac_load_detect); - drm_connector_attach_property(&radeon_connector->base, - rdev->mode_info.tv_std_property, - radeon_combios_get_tv_info(rdev)); - /* no HPD on analog connectors */ - radeon_connector->hpd.hpd = RADEON_HPD_NONE; - } + drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type); + drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs); + radeon_connector->dac_load_detect = true; + /* RS400,RC410,RS480 chipset seems to report a lot + * of false positive on load detect, we haven't yet + * found a way to make load detect reliable on those + * chipset, thus just disable it for TV. + */ + if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) + radeon_connector->dac_load_detect = false; + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.load_detect_property, + radeon_connector->dac_load_detect); + drm_connector_attach_property(&radeon_connector->base, + rdev->mode_info.tv_std_property, + radeon_combios_get_tv_info(rdev)); + /* no HPD on analog connectors */ + radeon_connector->hpd.hpd = RADEON_HPD_NONE; break; case DRM_MODE_CONNECTOR_LVDS: drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 69b3c22..256d204 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -293,30 +293,20 @@ bool radeon_card_posted(struct radeon_device *rdev) void radeon_update_bandwidth_info(struct radeon_device *rdev) { fixed20_12 a; - u32 sclk, mclk; + u32 sclk = rdev->pm.current_sclk; + u32 mclk = rdev->pm.current_mclk; - if (rdev->flags & RADEON_IS_IGP) { - sclk = radeon_get_engine_clock(rdev); - mclk = rdev->clock.default_mclk; - - a.full = dfixed_const(100); - rdev->pm.sclk.full = dfixed_const(sclk); - rdev->pm.sclk.full = dfixed_div(rdev->pm.sclk, a); - rdev->pm.mclk.full = dfixed_const(mclk); - rdev->pm.mclk.full = dfixed_div(rdev->pm.mclk, a); + /* sclk/mclk in Mhz */ + a.full = dfixed_const(100); + rdev->pm.sclk.full = dfixed_const(sclk); + rdev->pm.sclk.full = dfixed_div(rdev->pm.sclk, a); + rdev->pm.mclk.full = dfixed_const(mclk); + rdev->pm.mclk.full = dfixed_div(rdev->pm.mclk, a); + if (rdev->flags & RADEON_IS_IGP) { a.full = dfixed_const(16); /* core_bandwidth = sclk(Mhz) * 16 */ rdev->pm.core_bandwidth.full = dfixed_div(rdev->pm.sclk, a); - } else { - sclk = radeon_get_engine_clock(rdev); - mclk = radeon_get_memory_clock(rdev); - - a.full = dfixed_const(100); - rdev->pm.sclk.full = dfixed_const(sclk); - rdev->pm.sclk.full = dfixed_div(rdev->pm.sclk, a); - rdev->pm.mclk.full = dfixed_const(mclk); - rdev->pm.mclk.full = dfixed_div(rdev->pm.mclk, a); } } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 6dd434a..127a395 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1140,17 +1140,18 @@ bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, radeon_crtc->rmx_type = radeon_encoder->rmx_type; else radeon_crtc->rmx_type = RMX_OFF; - src_v = crtc->mode.vdisplay; - dst_v = radeon_crtc->native_mode.vdisplay; - src_h = crtc->mode.hdisplay; - dst_h = radeon_crtc->native_mode.vdisplay; /* copy native mode */ memcpy(&radeon_crtc->native_mode, &radeon_encoder->native_mode, sizeof(struct drm_display_mode)); + src_v = crtc->mode.vdisplay; + dst_v = radeon_crtc->native_mode.vdisplay; + src_h = crtc->mode.hdisplay; + dst_h = radeon_crtc->native_mode.hdisplay; /* fix up for overscan on hdmi */ if (ASIC_IS_AVIVO(rdev) && + (!(mode->flags & DRM_MODE_FLAG_INTERLACE)) && ((radeon_encoder->underscan_type == UNDERSCAN_ON) || ((radeon_encoder->underscan_type == UNDERSCAN_AUTO) && drm_detect_hdmi_monitor(radeon_connector->edid) && diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c index 0416804..6a13ee3 100644 --- a/drivers/gpu/drm/radeon/radeon_i2c.c +++ b/drivers/gpu/drm/radeon/radeon_i2c.c @@ -213,7 +213,7 @@ static void post_xfer(struct i2c_adapter *i2c_adap) static u32 radeon_get_i2c_prescale(struct radeon_device *rdev) { - u32 sclk = radeon_get_engine_clock(rdev); + u32 sclk = rdev->pm.current_sclk; u32 prescale = 0; u32 nm; u8 n, m, loop; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 8f93e2b..17a6602 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -204,7 +204,7 @@ struct radeon_i2c_chan { /* mostly for macs, but really any system without connector tables */ enum radeon_connector_table { - CT_NONE, + CT_NONE = 0, CT_GENERIC, CT_IBOOK, CT_POWERBOOK_EXTERNAL, @@ -215,6 +215,7 @@ enum radeon_connector_table { CT_IMAC_G5_ISIGHT, CT_EMAC, CT_RN50_POWER, + CT_MAC_X800, }; enum radeon_dvo_chip { @@ -600,7 +601,6 @@ extern bool radeon_get_atom_connector_info_from_supported_devices_table(struct d void radeon_enc_destroy(struct drm_encoder *encoder); void radeon_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj); void radeon_combios_asic_init(struct drm_device *dev); -extern int radeon_static_clocks_init(struct drm_device *dev); bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index f1c79681..bfa59db 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -905,6 +905,54 @@ static void rv770_gpu_init(struct radeon_device *rdev) } +static int rv770_vram_scratch_init(struct radeon_device *rdev) +{ + int r; + u64 gpu_addr; + + if (rdev->vram_scratch.robj == NULL) { + r = radeon_bo_create(rdev, NULL, RADEON_GPU_PAGE_SIZE, + true, RADEON_GEM_DOMAIN_VRAM, + &rdev->vram_scratch.robj); + if (r) { + return r; + } + } + + r = radeon_bo_reserve(rdev->vram_scratch.robj, false); + if (unlikely(r != 0)) + return r; + r = radeon_bo_pin(rdev->vram_scratch.robj, + RADEON_GEM_DOMAIN_VRAM, &gpu_addr); + if (r) { + radeon_bo_unreserve(rdev->vram_scratch.robj); + return r; + } + r = radeon_bo_kmap(rdev->vram_scratch.robj, + (void **)&rdev->vram_scratch.ptr); + if (r) + radeon_bo_unpin(rdev->vram_scratch.robj); + radeon_bo_unreserve(rdev->vram_scratch.robj); + + return r; +} + +static void rv770_vram_scratch_fini(struct radeon_device *rdev) +{ + int r; + + if (rdev->vram_scratch.robj == NULL) { + return; + } + r = radeon_bo_reserve(rdev->vram_scratch.robj, false); + if (likely(r == 0)) { + radeon_bo_kunmap(rdev->vram_scratch.robj); + radeon_bo_unpin(rdev->vram_scratch.robj); + radeon_bo_unreserve(rdev->vram_scratch.robj); + } + radeon_bo_unref(&rdev->vram_scratch.robj); +} + int rv770_mc_init(struct radeon_device *rdev) { u32 tmp; @@ -970,6 +1018,9 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; } + r = rv770_vram_scratch_init(rdev); + if (r) + return r; rv770_gpu_init(rdev); r = r600_blit_init(rdev); if (r) { @@ -1023,11 +1074,6 @@ int rv770_resume(struct radeon_device *rdev) */ /* post card */ atom_asic_init(rdev->mode_info.atom_context); - /* Initialize clocks */ - r = radeon_clocks_init(rdev); - if (r) { - return r; - } r = rv770_startup(rdev); if (r) { @@ -1118,9 +1164,6 @@ int rv770_init(struct radeon_device *rdev) radeon_surface_init(rdev); /* Initialize clocks */ radeon_get_clock_info(rdev->ddev); - r = radeon_clocks_init(rdev); - if (r) - return r; /* Fence driver */ r = radeon_fence_driver_init(rdev); if (r) @@ -1195,9 +1238,9 @@ void rv770_fini(struct radeon_device *rdev) r600_irq_fini(rdev); radeon_irq_kms_fini(rdev); rv770_pcie_gart_fini(rdev); + rv770_vram_scratch_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); - radeon_clocks_fini(rdev); radeon_agp_fini(rdev); radeon_bo_fini(rdev); radeon_atombios_fini(rdev); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 2ff5cf7..7083b1a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -335,7 +335,8 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector) } static enum drm_connector_status - vmw_ldu_connector_detect(struct drm_connector *connector) + vmw_ldu_connector_detect(struct drm_connector *connector, + bool force) { if (vmw_connector_to_ldu(connector)->pref_active) return connector_status_connected; @@ -516,7 +517,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_connector_init(dev, connector, &vmw_legacy_connector_funcs, DRM_MODE_CONNECTOR_LVDS); - connector->status = vmw_ldu_connector_detect(connector); + connector->status = vmw_ldu_connector_detect(connector, true); drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs, DRM_MODE_ENCODER_LVDS); @@ -610,7 +611,7 @@ int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num, ldu->pref_height = 600; ldu->pref_active = false; } - con->status = vmw_ldu_connector_detect(con); + con->status = vmw_ldu_connector_detect(con, true); } mutex_unlock(&dev->mode_config.mutex); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 0c52899..3f72924 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1285,8 +1285,11 @@ static const struct hid_device_id hid_blacklist[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_T91MT) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUSTEK_MULTITOUCH_YFO) }, { HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) }, { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_CANDO, USB_DEVICE_ID_CANDO_MULTI_TOUCH_11_6) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_CYMOTION) }, @@ -1578,7 +1581,6 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_AIPTEK, USB_DEVICE_ID_AIPTEK_24) }, { HID_USB_DEVICE(USB_VENDOR_ID_AIRCABLE, USB_DEVICE_ID_AIRCABLE1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ALCOR, USB_DEVICE_ID_ALCOR_USBRS232) }, - { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_T91MT)}, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_LCM)}, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_LCM2)}, { HID_USB_DEVICE(USB_VENDOR_ID_AVERMEDIA, USB_DEVICE_ID_AVER_FM_MR800) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 85c6d13..765a4f5 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -105,6 +105,7 @@ #define USB_VENDOR_ID_ASUS 0x0486 #define USB_DEVICE_ID_ASUS_T91MT 0x0185 +#define USB_DEVICE_ID_ASUSTEK_MULTITOUCH_YFO 0x0186 #define USB_VENDOR_ID_ASUSTEK 0x0b05 #define USB_DEVICE_ID_ASUSTEK_LCM 0x1726 @@ -128,6 +129,7 @@ #define USB_VENDOR_ID_BTC 0x046e #define USB_DEVICE_ID_BTC_EMPREX_REMOTE 0x5578 +#define USB_DEVICE_ID_BTC_EMPREX_REMOTE_2 0x5577 #define USB_VENDOR_ID_CANDO 0x2087 #define USB_DEVICE_ID_CANDO_MULTI_TOUCH 0x0a01 @@ -149,6 +151,7 @@ #define USB_VENDOR_ID_CHICONY 0x04f2 #define USB_DEVICE_ID_CHICONY_TACTICAL_PAD 0x0418 +#define USB_DEVICE_ID_CHICONY_MULTI_TOUCH 0xb19d #define USB_VENDOR_ID_CIDC 0x1677 @@ -507,6 +510,7 @@ #define USB_VENDOR_ID_UCLOGIC 0x5543 #define USB_DEVICE_ID_UCLOGIC_TABLET_PF1209 0x0042 #define USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U 0x0003 +#define USB_DEVICE_ID_UCLOGIC_TABLET_KNA5 0x6001 #define USB_VENDOR_ID_VERNIER 0x08f7 #define USB_DEVICE_ID_VERNIER_LABPRO 0x0001 diff --git a/drivers/hid/hid-mosart.c b/drivers/hid/hid-mosart.c index e91437c..ac5421d 100644 --- a/drivers/hid/hid-mosart.c +++ b/drivers/hid/hid-mosart.c @@ -239,6 +239,7 @@ static void mosart_remove(struct hid_device *hdev) static const struct hid_device_id mosart_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUS_T91MT) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUS, USB_DEVICE_ID_ASUSTEK_MULTITOUCH_YFO) }, { } }; MODULE_DEVICE_TABLE(hid, mosart_devices); diff --git a/drivers/hid/hid-topseed.c b/drivers/hid/hid-topseed.c index 5771f85..956ed9a 100644 --- a/drivers/hid/hid-topseed.c +++ b/drivers/hid/hid-topseed.c @@ -64,6 +64,7 @@ static int ts_input_mapping(struct hid_device *hdev, struct hid_input *hi, static const struct hid_device_id ts_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) }, { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_BTC, USB_DEVICE_ID_BTC_EMPREX_REMOTE_2) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) }, { } }; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index b729c02..599041a 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -828,6 +828,7 @@ static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t co } } else { int skipped_report_id = 0; + int report_id = buf[0]; if (buf[0] == 0x0) { /* Don't send the Report ID */ buf++; @@ -837,7 +838,7 @@ static int usbhid_output_raw_report(struct hid_device *hid, __u8 *buf, size_t co ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), HID_REQ_SET_REPORT, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, - ((report_type + 1) << 8) | *buf, + ((report_type + 1) << 8) | report_id, interface->desc.bInterfaceNumber, buf, count, USB_CTRL_SET_TIMEOUT); /* count also the report id, if this was a numbered report. */ @@ -1445,6 +1446,11 @@ static const struct hid_device_id hid_usb_table[] = { { } }; +struct usb_interface *usbhid_find_interface(int minor) +{ + return usb_find_interface(&hid_driver, minor); +} + static struct hid_driver hid_usb_driver = { .name = "generic-usb", .id_table = hid_usb_table, diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 2643d31..70da318 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -33,6 +33,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AASHIMA, USB_DEVICE_ID_AASHIMA_PREDATOR, HID_QUIRK_BADPAD }, { USB_VENDOR_ID_ALPS, USB_DEVICE_ID_IBM_GAMEPAD, HID_QUIRK_BADPAD }, { USB_VENDOR_ID_CHIC, USB_DEVICE_ID_CHIC_GAMEPAD, HID_QUIRK_BADPAD }, + { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER, HID_QUIRK_MULTI_INPUT | HID_QUIRK_NOGET }, { USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_MOJO, USB_DEVICE_ID_RETRO_ADAPTER, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_HAPP, USB_DEVICE_ID_UGCI_DRIVING, HID_QUIRK_BADPAD | HID_QUIRK_MULTI_INPUT }, @@ -69,6 +70,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD, HID_QUIRK_NOGET }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_PF1209, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_WP4030U, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_QUAD_USB_JOYPAD, HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT }, @@ -77,6 +79,8 @@ static const struct hid_blacklist { { USB_VENDOR_ID_PI_ENGINEERING, USB_DEVICE_ID_PI_ENGINEERING_VEC_USB_FOOTPEDAL, HID_QUIRK_HIDINPUT_FORCE }, + { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT }, + { 0, 0 } }; diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 0a29c51..681e620 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -270,7 +270,7 @@ static int hiddev_open(struct inode *inode, struct file *file) struct hiddev *hiddev; int res; - intf = usb_find_interface(&hiddev_driver, iminor(inode)); + intf = usbhid_find_interface(iminor(inode)); if (!intf) return -ENODEV; hid = usb_get_intfdata(intf); diff --git a/drivers/hid/usbhid/usbhid.h b/drivers/hid/usbhid/usbhid.h index 693fd3e..89d2e84 100644 --- a/drivers/hid/usbhid/usbhid.h +++ b/drivers/hid/usbhid/usbhid.h @@ -42,6 +42,7 @@ void usbhid_submit_report (struct hid_device *hid, struct hid_report *report, unsigned char dir); int usbhid_get_power(struct hid_device *hid); void usbhid_put_power(struct hid_device *hid); +struct usb_interface *usbhid_find_interface(int minor); /* iofl flags */ #define HID_CTRL_RUNNING 1 diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 15c1a96..0683e6b 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -79,7 +79,7 @@ struct adm1031_data { int chip_type; char valid; /* !=0 if following fields are valid */ unsigned long last_updated; /* In jiffies */ - unsigned int update_rate; /* In milliseconds */ + unsigned int update_interval; /* In milliseconds */ /* The chan_select_table contains the possible configurations for * auto fan control. */ @@ -743,23 +743,23 @@ static SENSOR_DEVICE_ATTR(temp3_crit_alarm, S_IRUGO, show_alarm, NULL, 12); static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 13); static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 14); -/* Update Rate */ -static const unsigned int update_rates[] = { +/* Update Interval */ +static const unsigned int update_intervals[] = { 16000, 8000, 4000, 2000, 1000, 500, 250, 125, }; -static ssize_t show_update_rate(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t show_update_interval(struct device *dev, + struct device_attribute *attr, char *buf) { struct i2c_client *client = to_i2c_client(dev); struct adm1031_data *data = i2c_get_clientdata(client); - return sprintf(buf, "%u\n", data->update_rate); + return sprintf(buf, "%u\n", data->update_interval); } -static ssize_t set_update_rate(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +static ssize_t set_update_interval(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct i2c_client *client = to_i2c_client(dev); struct adm1031_data *data = i2c_get_clientdata(client); @@ -771,12 +771,15 @@ static ssize_t set_update_rate(struct device *dev, if (err) return err; - /* find the nearest update rate from the table */ - for (i = 0; i < ARRAY_SIZE(update_rates) - 1; i++) { - if (val >= update_rates[i]) + /* + * Find the nearest update interval from the table. + * Use it to determine the matching update rate. + */ + for (i = 0; i < ARRAY_SIZE(update_intervals) - 1; i++) { + if (val >= update_intervals[i]) break; } - /* if not found, we point to the last entry (lowest update rate) */ + /* if not found, we point to the last entry (lowest update interval) */ /* set the new update rate while preserving other settings */ reg = adm1031_read_value(client, ADM1031_REG_FAN_FILTER); @@ -785,14 +788,14 @@ static ssize_t set_update_rate(struct device *dev, adm1031_write_value(client, ADM1031_REG_FAN_FILTER, reg); mutex_lock(&data->update_lock); - data->update_rate = update_rates[i]; + data->update_interval = update_intervals[i]; mutex_unlock(&data->update_lock); return count; } -static DEVICE_ATTR(update_rate, S_IRUGO | S_IWUSR, show_update_rate, - set_update_rate); +static DEVICE_ATTR(update_interval, S_IRUGO | S_IWUSR, show_update_interval, + set_update_interval); static struct attribute *adm1031_attributes[] = { &sensor_dev_attr_fan1_input.dev_attr.attr, @@ -830,7 +833,7 @@ static struct attribute *adm1031_attributes[] = { &sensor_dev_attr_auto_fan1_min_pwm.dev_attr.attr, - &dev_attr_update_rate.attr, + &dev_attr_update_interval.attr, &dev_attr_alarms.attr, NULL @@ -981,7 +984,8 @@ static void adm1031_init_client(struct i2c_client *client) mask = ADM1031_UPDATE_RATE_MASK; read_val = adm1031_read_value(client, ADM1031_REG_FAN_FILTER); i = (read_val & mask) >> ADM1031_UPDATE_RATE_SHIFT; - data->update_rate = update_rates[i]; + /* Save it as update interval */ + data->update_interval = update_intervals[i]; } static struct adm1031_data *adm1031_update_device(struct device *dev) @@ -993,7 +997,8 @@ static struct adm1031_data *adm1031_update_device(struct device *dev) mutex_lock(&data->update_lock); - next_update = data->last_updated + msecs_to_jiffies(data->update_rate); + next_update = data->last_updated + + msecs_to_jiffies(data->update_interval); if (time_after(jiffies, next_update) || !data->valid) { dev_dbg(&client->dev, "Starting adm1031 update\n"); diff --git a/drivers/hwmon/emc1403.c b/drivers/hwmon/emc1403.c index 5b58b20..8dee3f3 100644 --- a/drivers/hwmon/emc1403.c +++ b/drivers/hwmon/emc1403.c @@ -308,7 +308,6 @@ static int emc1403_probe(struct i2c_client *client, res = sysfs_create_group(&client->dev.kobj, &m_thermal_gr); if (res) { dev_warn(&client->dev, "create group failed\n"); - hwmon_device_unregister(data->hwmon_dev); goto thermal_error1; } data->hwmon_dev = hwmon_device_register(&client->dev); diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c index 0f58ecc..9638d58 100644 --- a/drivers/hwmon/f75375s.c +++ b/drivers/hwmon/f75375s.c @@ -79,7 +79,7 @@ enum chips { f75373, f75375 }; #define F75375_REG_PWM2_DROP_DUTY 0x6C #define FAN_CTRL_LINEAR(nr) (4 + nr) -#define FAN_CTRL_MODE(nr) (5 + ((nr) * 2)) +#define FAN_CTRL_MODE(nr) (4 + ((nr) * 2)) /* * Data structures and manipulation thereof @@ -298,7 +298,7 @@ static int set_pwm_enable_direct(struct i2c_client *client, int nr, int val) return -EINVAL; fanmode = f75375_read8(client, F75375_REG_FAN_TIMER); - fanmode = ~(3 << FAN_CTRL_MODE(nr)); + fanmode &= ~(3 << FAN_CTRL_MODE(nr)); switch (val) { case 0: /* Full speed */ @@ -350,7 +350,7 @@ static ssize_t set_pwm_mode(struct device *dev, struct device_attribute *attr, mutex_lock(&data->update_lock); conf = f75375_read8(client, F75375_REG_CONFIG1); - conf = ~(1 << FAN_CTRL_LINEAR(nr)); + conf &= ~(1 << FAN_CTRL_LINEAR(nr)); if (val == 0) conf |= (1 << FAN_CTRL_LINEAR(nr)) ; diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index 7580f55..36e9575 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -221,6 +221,8 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("HPB442x", "HP ProBook 442", xy_rotated_left), AXIS_DMI_MATCH("HPB452x", "HP ProBook 452", y_inverted), AXIS_DMI_MATCH("HPB522x", "HP ProBook 522", xy_swap), + AXIS_DMI_MATCH("HPB532x", "HP ProBook 532", y_inverted), + AXIS_DMI_MATCH("Mini5102", "HP Mini 5102", xy_rotated_left_usd), { NULL, } /* Laptop models without axis info (yet): * "NC6910" "HP Compaq 6910" diff --git a/drivers/hwmon/lis3lv02d_i2c.c b/drivers/hwmon/lis3lv02d_i2c.c index dc1f540..8e5933b 100644 --- a/drivers/hwmon/lis3lv02d_i2c.c +++ b/drivers/hwmon/lis3lv02d_i2c.c @@ -121,7 +121,7 @@ static int lis3lv02d_i2c_suspend(struct i2c_client *client, pm_message_t mesg) { struct lis3lv02d *lis3 = i2c_get_clientdata(client); - if (!lis3->pdata->wakeup_flags) + if (!lis3->pdata || !lis3->pdata->wakeup_flags) lis3lv02d_poweroff(lis3); return 0; } @@ -130,7 +130,7 @@ static int lis3lv02d_i2c_resume(struct i2c_client *client) { struct lis3lv02d *lis3 = i2c_get_clientdata(client); - if (!lis3->pdata->wakeup_flags) + if (!lis3->pdata || !lis3->pdata->wakeup_flags) lis3lv02d_poweron(lis3); return 0; } diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c index 82b1680..b9be5e3 100644 --- a/drivers/hwmon/lis3lv02d_spi.c +++ b/drivers/hwmon/lis3lv02d_spi.c @@ -92,7 +92,7 @@ static int lis3lv02d_spi_suspend(struct spi_device *spi, pm_message_t mesg) { struct lis3lv02d *lis3 = spi_get_drvdata(spi); - if (!lis3->pdata->wakeup_flags) + if (!lis3->pdata || !lis3->pdata->wakeup_flags) lis3lv02d_poweroff(&lis3_dev); return 0; @@ -102,7 +102,7 @@ static int lis3lv02d_spi_resume(struct spi_device *spi) { struct lis3lv02d *lis3 = spi_get_drvdata(spi); - if (!lis3->pdata->wakeup_flags) + if (!lis3->pdata || !lis3->pdata->wakeup_flags) lis3lv02d_poweron(lis3); return 0; diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c index 94741d4..464340f 100644 --- a/drivers/hwmon/lm95241.c +++ b/drivers/hwmon/lm95241.c @@ -91,7 +91,7 @@ static struct lm95241_data *lm95241_update_device(struct device *dev); struct lm95241_data { struct device *hwmon_dev; struct mutex update_lock; - unsigned long last_updated, rate; /* in jiffies */ + unsigned long last_updated, interval; /* in jiffies */ char valid; /* zero until following fields are valid */ /* registers values */ u8 local_h, local_l; /* local */ @@ -114,23 +114,23 @@ show_temp(local); show_temp(remote1); show_temp(remote2); -static ssize_t show_rate(struct device *dev, struct device_attribute *attr, +static ssize_t show_interval(struct device *dev, struct device_attribute *attr, char *buf) { struct lm95241_data *data = lm95241_update_device(dev); - snprintf(buf, PAGE_SIZE - 1, "%lu\n", 1000 * data->rate / HZ); + snprintf(buf, PAGE_SIZE - 1, "%lu\n", 1000 * data->interval / HZ); return strlen(buf); } -static ssize_t set_rate(struct device *dev, struct device_attribute *attr, +static ssize_t set_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct i2c_client *client = to_i2c_client(dev); struct lm95241_data *data = i2c_get_clientdata(client); - strict_strtol(buf, 10, &data->rate); - data->rate = data->rate * HZ / 1000; + strict_strtol(buf, 10, &data->interval); + data->interval = data->interval * HZ / 1000; return count; } @@ -286,7 +286,8 @@ static DEVICE_ATTR(temp2_min, S_IWUSR | S_IRUGO, show_min1, set_min1); static DEVICE_ATTR(temp3_min, S_IWUSR | S_IRUGO, show_min2, set_min2); static DEVICE_ATTR(temp2_max, S_IWUSR | S_IRUGO, show_max1, set_max1); static DEVICE_ATTR(temp3_max, S_IWUSR | S_IRUGO, show_max2, set_max2); -static DEVICE_ATTR(rate, S_IWUSR | S_IRUGO, show_rate, set_rate); +static DEVICE_ATTR(update_interval, S_IWUSR | S_IRUGO, show_interval, + set_interval); static struct attribute *lm95241_attributes[] = { &dev_attr_temp1_input.attr, @@ -298,7 +299,7 @@ static struct attribute *lm95241_attributes[] = { &dev_attr_temp3_min.attr, &dev_attr_temp2_max.attr, &dev_attr_temp3_max.attr, - &dev_attr_rate.attr, + &dev_attr_update_interval.attr, NULL }; @@ -376,7 +377,7 @@ static void lm95241_init_client(struct i2c_client *client) { struct lm95241_data *data = i2c_get_clientdata(client); - data->rate = HZ; /* 1 sec default */ + data->interval = HZ; /* 1 sec default */ data->valid = 0; data->config = CFG_CR0076; data->model = 0; @@ -410,7 +411,7 @@ static struct lm95241_data *lm95241_update_device(struct device *dev) mutex_lock(&data->update_lock); - if (time_after(jiffies, data->last_updated + data->rate) || + if (time_after(jiffies, data->last_updated + data->interval) || !data->valid) { dev_dbg(&client->dev, "Updating lm95241 data.\n"); data->local_h = diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index e96e69d..072c580 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -127,6 +127,7 @@ superio_enter(int ioreg) static inline void superio_exit(int ioreg) { + outb(0xaa, ioreg); outb(0x02, ioreg); outb(0x02, ioreg + 1); } diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 7433e07..7c5b01c 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -516,10 +516,10 @@ static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect) return ide_no_data_taskfile(drive, &cmd); } -static void update_ordered(ide_drive_t *drive) +static void update_flush(ide_drive_t *drive) { u16 *id = drive->id; - unsigned ordered = QUEUE_ORDERED_NONE; + unsigned flush = 0; if (drive->dev_flags & IDE_DFLAG_WCACHE) { unsigned long long capacity; @@ -543,13 +543,12 @@ static void update_ordered(ide_drive_t *drive) drive->name, barrier ? "" : "not "); if (barrier) { - ordered = QUEUE_ORDERED_DRAIN_FLUSH; + flush = REQ_FLUSH; blk_queue_prep_rq(drive->queue, idedisk_prep_fn); } - } else - ordered = QUEUE_ORDERED_DRAIN; + } - blk_queue_ordered(drive->queue, ordered); + blk_queue_flush(drive->queue, flush); } ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE); @@ -572,7 +571,7 @@ static int set_wcache(ide_drive_t *drive, int arg) } } - update_ordered(drive); + update_flush(drive); return err; } diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a381be8..999dac0 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -441,19 +441,6 @@ void do_ide_request(struct request_queue *q) struct request *rq = NULL; ide_startstop_t startstop; - /* - * drive is doing pre-flush, ordered write, post-flush sequence. even - * though that is 3 requests, it must be seen as a single transaction. - * we must not preempt this drive until that is complete - */ - if (blk_queue_flushing(q)) - /* - * small race where queue could get replugged during - * the 3-request flush cycle, just yank the plug since - * we want it to finish asap - */ - blk_remove_plug(q); - spin_unlock_irq(q->queue_lock); /* HLD do_request() callback might sleep, make sure it's okay */ diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 4c3d1bf..068cef0 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1448,19 +1448,13 @@ int ide_host_register(struct ide_host *host, const struct ide_port_info *d, if (hwif == NULL) continue; - if (hwif->present) - hwif_register_devices(hwif); - } - - ide_host_for_each_port(i, hwif, host) { - if (hwif == NULL) - continue; - ide_sysfs_register_port(hwif); ide_proc_register_port(hwif); - if (hwif->present) + if (hwif->present) { ide_proc_port_register_devices(hwif); + hwif_register_devices(hwif); + } } return j ? 0 : -1; diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h index 8f0caf7..78fbe9f 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.h +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h @@ -53,7 +53,7 @@ #define T3_MAX_PBL_SIZE 256 #define T3_MAX_RQ_SIZE 1024 #define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1) -#define T3_MAX_CQ_DEPTH 262144 +#define T3_MAX_CQ_DEPTH 65536 #define T3_MAX_NUM_STAG (1<<15) #define T3_MAX_MR_SIZE 0x100000000ULL #define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 443cea5..61e0efd 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -502,7 +502,9 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, static void nes_retrans_expired(struct nes_cm_node *cm_node) { struct iw_cm_id *cm_id = cm_node->cm_id; - switch (cm_node->state) { + enum nes_cm_node_state state = cm_node->state; + cm_node->state = NES_CM_STATE_CLOSED; + switch (state) { case NES_CM_STATE_SYN_RCVD: case NES_CM_STATE_CLOSING: rem_ref_cm_node(cm_node->cm_core, cm_node); @@ -511,7 +513,6 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node) case NES_CM_STATE_FIN_WAIT1: if (cm_node->cm_id) cm_id->rem_ref(cm_id); - cm_node->state = NES_CM_STATE_CLOSED; send_reset(cm_node, NULL); break; default: @@ -1439,9 +1440,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, break; case NES_CM_STATE_MPAREQ_RCVD: passive_state = atomic_add_return(1, &cm_node->passive_state); - if (passive_state == NES_SEND_RESET_EVENT) - create_event(cm_node, NES_CM_EVENT_RESET); - cm_node->state = NES_CM_STATE_CLOSED; dev_kfree_skb_any(skb); break; case NES_CM_STATE_ESTABLISHED: @@ -1456,6 +1454,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_CLOSED: drop_packet(skb); break; + case NES_CM_STATE_FIN_WAIT2: case NES_CM_STATE_FIN_WAIT1: case NES_CM_STATE_LAST_ACK: cm_node->cm_id->rem_ref(cm_node->cm_id); @@ -2777,6 +2776,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -EINVAL; } + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) { + rem_ref_cm_node(cm_node->cm_core, cm_node); + return -ECONNRESET; + } + /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; cm_node->nesqp = nesqp; @@ -2979,9 +2984,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " "ret=%d\n", __func__, __LINE__, ret); - passive_state = atomic_add_return(1, &cm_node->passive_state); - if (passive_state == NES_SEND_RESET_EVENT) - create_event(cm_node, NES_CM_EVENT_RESET); return 0; } diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index f8233c8..1980a46 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -3468,6 +3468,19 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, return; /* Ignore it, wait for close complete */ if (atomic_inc_return(&nesqp->close_timer_started) == 1) { + if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && + (nesqp->ibqp_state == IB_QPS_RTS) && + ((nesadapter->eeprom_version >> 16) != NES_A0)) { + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; + nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING; + spin_unlock_irqrestore(&nesqp->lock, flags); + nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0); + nes_cm_disconn(nesqp); + } nesqp->cm_id->add_ref(nesqp->cm_id); schedule_nes_timer(nesqp->cm_node, (struct sk_buff *)nesqp, NES_TIMER_TYPE_CLOSE, 1, 0); @@ -3477,7 +3490,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), async_event_id, nesqp->last_aeq, tcp_state); } - break; case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: if (nesqp->term_flags) { diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index aa9183d..1204c34 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -45,6 +45,7 @@ #define NES_PHY_TYPE_KR 9 #define NES_MULTICAST_PF_MAX 8 +#define NES_A0 3 enum pci_regs { NES_INT_STAT = 0x0000, diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 6dfdd49..10560c7 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1446,14 +1446,14 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp |= NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 0; } else if ((et_pauseparam->tx_pause == 0) && (nesdev->disable_tx_flow_control == 0)) { u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200)); u32temp &= ~NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE; nes_write_indexed(nesdev, - NES_IDX_MAC_TX_CONFIG_ENABLE_PAUSE + (nesdev->mac_index*0x200), u32temp); + NES_IDX_MAC_TX_CONFIG + (nesdev->mac_index*0x200), u32temp); nesdev->disable_tx_flow_control = 1; } if ((et_pauseparam->rx_pause == 1) && (nesdev->disable_rx_flow_control == 1)) { diff --git a/drivers/input/input.c b/drivers/input/input.c index a9b025f..ab69820 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1599,11 +1599,14 @@ EXPORT_SYMBOL(input_free_device); * @dev: input device supporting MT events and finger tracking * @num_slots: number of slots used by the device * - * This function allocates all necessary memory for MT slot handling - * in the input device, and adds ABS_MT_SLOT to the device capabilities. + * This function allocates all necessary memory for MT slot handling in the + * input device, and adds ABS_MT_SLOT to the device capabilities. All slots + * are initially marked as unused iby setting ABS_MT_TRACKING_ID to -1. */ int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots) { + int i; + if (!num_slots) return 0; @@ -1614,6 +1617,10 @@ int input_mt_create_slots(struct input_dev *dev, unsigned int num_slots) dev->mtsize = num_slots; input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0); + /* Mark slots as 'unused' */ + for (i = 0; i < num_slots; i++) + dev->mt[i].abs[ABS_MT_TRACKING_ID - ABS_MT_FIRST] = -1; + return 0; } EXPORT_SYMBOL(input_mt_create_slots); diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index ea67c49..b952317 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -337,10 +337,14 @@ static void report_finger_data(struct input_dev *input, const struct bcm5974_config *cfg, const struct tp_finger *f) { - input_report_abs(input, ABS_MT_TOUCH_MAJOR, raw2int(f->force_major)); - input_report_abs(input, ABS_MT_TOUCH_MINOR, raw2int(f->force_minor)); - input_report_abs(input, ABS_MT_WIDTH_MAJOR, raw2int(f->size_major)); - input_report_abs(input, ABS_MT_WIDTH_MINOR, raw2int(f->size_minor)); + input_report_abs(input, ABS_MT_TOUCH_MAJOR, + raw2int(f->force_major) << 1); + input_report_abs(input, ABS_MT_TOUCH_MINOR, + raw2int(f->force_minor) << 1); + input_report_abs(input, ABS_MT_WIDTH_MAJOR, + raw2int(f->size_major) << 1); + input_report_abs(input, ABS_MT_WIDTH_MINOR, + raw2int(f->size_minor) << 1); input_report_abs(input, ABS_MT_ORIENTATION, MAX_FINGER_ORIENTATION - raw2int(f->orientation)); input_report_abs(input, ABS_MT_POSITION_X, raw2int(f->abs_x)); diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 46e4ba0..f585131 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1485,8 +1485,8 @@ static int __init i8042_init(void) static void __exit i8042_exit(void) { - platform_driver_unregister(&i8042_driver); platform_device_unregister(i8042_platform_device); + platform_driver_unregister(&i8042_driver); i8042_platform_exit(); panic_blink = NULL; diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 40d77ba..6e29bad 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -243,10 +243,10 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) if (features->type == WACOM_G4 || features->type == WACOM_MO) { input_report_abs(input, ABS_DISTANCE, data[6] & 0x3f); - rw = (signed)(data[7] & 0x04) - (data[7] & 0x03); + rw = (data[7] & 0x04) - (data[7] & 0x03); } else { input_report_abs(input, ABS_DISTANCE, data[7] & 0x3f); - rw = -(signed)data[6]; + rw = -(signed char)data[6]; } input_report_rel(input, REL_WHEEL, rw); } diff --git a/drivers/md/.gitignore b/drivers/md/.gitignore deleted file mode 100644 index a7afec6..0000000 --- a/drivers/md/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -mktables -raid6altivec*.c -raid6int*.c -raid6tables.c diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1ba1e12..ed4900a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1542,8 +1542,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) atomic_read(&bitmap->mddev->recovery_active) == 0); bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; - if (bitmap->mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); + set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 368e8e9..d5b0e4c 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1278,7 +1278,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, struct dm_crypt_io *io; struct crypt_config *cc; - if (unlikely(bio_empty_barrier(bio))) { + if (bio->bi_rw & REQ_FLUSH) { cc = ti->private; bio->bi_bdev = cc->dev->bdev; return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0590c75..136d4f7 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -31,7 +31,6 @@ struct dm_io_client { */ struct io { unsigned long error_bits; - unsigned long eopnotsupp_bits; atomic_t count; struct task_struct *sleeper; struct dm_io_client *client; @@ -130,11 +129,8 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, *---------------------------------------------------------------*/ static void dec_count(struct io *io, unsigned int region, int error) { - if (error) { + if (error) set_bit(region, &io->error_bits); - if (error == -EOPNOTSUPP) - set_bit(region, &io->eopnotsupp_bits); - } if (atomic_dec_and_test(&io->count)) { if (io->sleeper) @@ -310,8 +306,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, sector_t remaining = where->count; /* - * where->count may be zero if rw holds a write barrier and we - * need to send a zero-sized barrier. + * where->count may be zero if rw holds a flush and we need to + * send a zero-sized flush. */ do { /* @@ -364,7 +360,7 @@ static void dispatch_io(int rw, unsigned int num_regions, */ for (i = 0; i < num_regions; i++) { *dp = old_pages; - if (where[i].count || (rw & REQ_HARDBARRIER)) + if (where[i].count || (rw & REQ_FLUSH)) do_region(rw, i, where + i, dp, io); } @@ -393,9 +389,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, return -EIO; } -retry: io->error_bits = 0; - io->eopnotsupp_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = current; io->client = client; @@ -412,11 +406,6 @@ retry: } set_current_state(TASK_RUNNING); - if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) { - rw &= ~REQ_HARDBARRIER; - goto retry; - } - if (error_bits) *error_bits = io->error_bits; @@ -437,7 +426,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; - io->eopnotsupp_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = NULL; io->client = client; diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 5a08be0..33420e6 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -300,7 +300,7 @@ static int flush_header(struct log_c *lc) .count = 0, }; - lc->io_req.bi_rw = WRITE_BARRIER; + lc->io_req.bi_rw = WRITE_FLUSH; return dm_io(&lc->io_req, 1, &null_location, NULL); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7c081bc..19a59b0 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -259,7 +259,7 @@ static int mirror_flush(struct dm_target *ti) struct dm_io_region io[ms->nr_mirrors]; struct mirror *m; struct dm_io_request io_req = { - .bi_rw = WRITE_BARRIER, + .bi_rw = WRITE_FLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.bvec = NULL, .client = ms->io_client, @@ -629,7 +629,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) struct dm_io_region io[ms->nr_mirrors], *dest = io; struct mirror *m; struct dm_io_request io_req = { - .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER), + .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA), .mem.type = DM_IO_BVEC, .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx, .notify.fn = write_callback, @@ -670,7 +670,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) bio_list_init(&requeue); while ((bio = bio_list_pop(writes))) { - if (unlikely(bio_empty_barrier(bio))) { + if (bio->bi_rw & REQ_FLUSH) { bio_list_add(&sync, bio); continue; } @@ -1203,7 +1203,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * We need to dec pending if this was a write. */ if (rw == WRITE) { - if (likely(!bio_empty_barrier(bio))) + if (!(bio->bi_rw & REQ_FLUSH)) dm_rh_dec(ms->rh, map_context->ll); return error; } diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index bd5c58b..dad011a 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -81,9 +81,9 @@ struct dm_region_hash { struct list_head failed_recovered_regions; /* - * If there was a barrier failure no regions can be marked clean. + * If there was a flush failure no regions can be marked clean. */ - int barrier_failure; + int flush_failure; void *context; sector_t target_begin; @@ -217,7 +217,7 @@ struct dm_region_hash *dm_region_hash_create( INIT_LIST_HEAD(&rh->quiesced_regions); INIT_LIST_HEAD(&rh->recovered_regions); INIT_LIST_HEAD(&rh->failed_recovered_regions); - rh->barrier_failure = 0; + rh->flush_failure = 0; rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS, sizeof(struct dm_region)); @@ -399,8 +399,8 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio) region_t region = dm_rh_bio_to_region(rh, bio); int recovering = 0; - if (bio_empty_barrier(bio)) { - rh->barrier_failure = 1; + if (bio->bi_rw & REQ_FLUSH) { + rh->flush_failure = 1; return; } @@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios) struct bio *bio; for (bio = bios->head; bio; bio = bio->bi_next) { - if (bio_empty_barrier(bio)) + if (bio->bi_rw & REQ_FLUSH) continue; rh_inc(rh, dm_rh_bio_to_region(rh, bio)); } @@ -555,9 +555,9 @@ void dm_rh_dec(struct dm_region_hash *rh, region_t region) */ /* do nothing for DM_RH_NOSYNC */ - if (unlikely(rh->barrier_failure)) { + if (unlikely(rh->flush_failure)) { /* - * If a write barrier failed some time ago, we + * If a write flush failed some time ago, we * don't know whether or not this write made it * to the disk, so we must resync the device. */ diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index cc2bdb8..0b61792 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -687,7 +687,7 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, WRITE_BARRIER)) + if (ps->valid && area_io(ps, WRITE_FLUSH_FUA)) ps->valid = 0; /* diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5974d30..53cf79d 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -706,8 +706,6 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) return 0; } -#define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))) - /* * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots. @@ -1587,7 +1585,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; - if (unlikely(bio_empty_barrier(bio))) { + if (bio->bi_rw & REQ_FLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; } @@ -1691,7 +1689,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, int r = DM_MAPIO_REMAPPED; chunk_t chunk; - if (unlikely(bio_empty_barrier(bio))) { + if (bio->bi_rw & REQ_FLUSH) { if (!map_context->target_request_nr) bio->bi_bdev = s->origin->bdev; else @@ -2135,7 +2133,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio, struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; - if (unlikely(bio_empty_barrier(bio))) + if (bio->bi_rw & REQ_FLUSH) return DM_MAPIO_REMAPPED; /* Only tell snapshots if this is a write */ diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c297f6d..f0371b4 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -271,7 +271,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, uint32_t stripe; unsigned target_request_nr; - if (unlikely(bio_empty_barrier(bio))) { + if (bio->bi_rw & REQ_FLUSH) { target_request_nr = map_context->target_request_nr; BUG_ON(target_request_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f9fc07d..90267f8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -486,11 +486,6 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, return 0; } -/* - * Returns the minimum that is _not_ zero, unless both are zero. - */ -#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) - int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ac384b2..f934e98 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -110,7 +110,6 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 -#define DMF_QUEUE_IO_TO_THREAD 6 /* * Work processed by per-device workqueue. @@ -144,24 +143,9 @@ struct mapped_device { spinlock_t deferred_lock; /* - * An error from the barrier request currently being processed. - */ - int barrier_error; - - /* - * Protect barrier_error from concurrent endio processing - * in request-based dm. - */ - spinlock_t barrier_error_lock; - - /* - * Processing queue (flush/barriers) + * Processing queue (flush) */ struct workqueue_struct *wq; - struct work_struct barrier_work; - - /* A pointer to the currently processing pre/post flush request */ - struct request *flush_request; /* * The current mapping. @@ -200,8 +184,8 @@ struct mapped_device { /* sysfs handle */ struct kobject kobj; - /* zero-length barrier that will be cloned and submitted to targets */ - struct bio barrier_bio; + /* zero-length flush that will be cloned and submitted to targets */ + struct bio flush_bio; }; /* @@ -512,7 +496,7 @@ static void end_io_acct(struct dm_io *io) /* * After this is decremented the bio must not be touched if it is - * a barrier. + * a flush. */ dm_disk(md)->part0.in_flight[rw] = pending = atomic_dec_return(&md->pending[rw]); @@ -528,16 +512,12 @@ static void end_io_acct(struct dm_io *io) */ static void queue_io(struct mapped_device *md, struct bio *bio) { - down_write(&md->io_lock); + unsigned long flags; - spin_lock_irq(&md->deferred_lock); + spin_lock_irqsave(&md->deferred_lock, flags); bio_list_add(&md->deferred, bio); - spin_unlock_irq(&md->deferred_lock); - - if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) - queue_work(md->wq, &md->work); - - up_write(&md->io_lock); + spin_unlock_irqrestore(&md->deferred_lock, flags); + queue_work(md->wq, &md->work); } /* @@ -625,11 +605,9 @@ static void dec_pending(struct dm_io *io, int error) * Target requested pushing back the I/O. */ spin_lock_irqsave(&md->deferred_lock, flags); - if (__noflush_suspending(md)) { - if (!(io->bio->bi_rw & REQ_HARDBARRIER)) - bio_list_add_head(&md->deferred, - io->bio); - } else + if (__noflush_suspending(md)) + bio_list_add_head(&md->deferred, io->bio); + else /* noflush suspend was interrupted. */ io->error = -EIO; spin_unlock_irqrestore(&md->deferred_lock, flags); @@ -637,32 +615,23 @@ static void dec_pending(struct dm_io *io, int error) io_error = io->error; bio = io->bio; + end_io_acct(io); + free_io(md, io); + + if (io_error == DM_ENDIO_REQUEUE) + return; - if (bio->bi_rw & REQ_HARDBARRIER) { + if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { /* - * There can be just one barrier request so we use - * a per-device variable for error reporting. - * Note that you can't touch the bio after end_io_acct - * - * We ignore -EOPNOTSUPP for empty flush reported by - * underlying devices. We assume that if the device - * doesn't support empty barriers, it doesn't need - * cache flushing commands. + * Preflush done for flush with data, reissue + * without REQ_FLUSH. */ - if (!md->barrier_error && - !(bio_empty_barrier(bio) && io_error == -EOPNOTSUPP)) - md->barrier_error = io_error; - end_io_acct(io); - free_io(md, io); + bio->bi_rw &= ~REQ_FLUSH; + queue_io(md, bio); } else { - end_io_acct(io); - free_io(md, io); - - if (io_error != DM_ENDIO_REQUEUE) { - trace_block_bio_complete(md->queue, bio); - - bio_endio(bio, io_error); - } + /* done with normal IO or empty flush */ + trace_block_bio_complete(md->queue, bio); + bio_endio(bio, io_error); } } } @@ -755,23 +724,6 @@ static void end_clone_bio(struct bio *clone, int error) blk_update_request(tio->orig, 0, nr_bytes); } -static void store_barrier_error(struct mapped_device *md, int error) -{ - unsigned long flags; - - spin_lock_irqsave(&md->barrier_error_lock, flags); - /* - * Basically, the first error is taken, but: - * -EOPNOTSUPP supersedes any I/O error. - * Requeue request supersedes any I/O error but -EOPNOTSUPP. - */ - if (!md->barrier_error || error == -EOPNOTSUPP || - (md->barrier_error != -EOPNOTSUPP && - error == DM_ENDIO_REQUEUE)) - md->barrier_error = error; - spin_unlock_irqrestore(&md->barrier_error_lock, flags); -} - /* * Don't touch any member of the md after calling this function because * the md may be freed in dm_put() at the end of this function. @@ -809,13 +761,11 @@ static void free_rq_clone(struct request *clone) static void dm_end_request(struct request *clone, int error) { int rw = rq_data_dir(clone); - int run_queue = 1; - bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER; struct dm_rq_target_io *tio = clone->end_io_data; struct mapped_device *md = tio->md; struct request *rq = tio->orig; - if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) { + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { rq->errors = clone->errors; rq->resid_len = clone->resid_len; @@ -829,15 +779,8 @@ static void dm_end_request(struct request *clone, int error) } free_rq_clone(clone); - - if (unlikely(is_barrier)) { - if (unlikely(error)) - store_barrier_error(md, error); - run_queue = 0; - } else - blk_end_request_all(rq, error); - - rq_completed(md, rw, run_queue); + blk_end_request_all(rq, error); + rq_completed(md, rw, true); } static void dm_unprep_request(struct request *rq) @@ -862,16 +805,6 @@ void dm_requeue_unmapped_request(struct request *clone) struct request_queue *q = rq->q; unsigned long flags; - if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { - /* - * Barrier clones share an original request. - * Leave it to dm_end_request(), which handles this special - * case. - */ - dm_end_request(clone, DM_ENDIO_REQUEUE); - return; - } - dm_unprep_request(rq); spin_lock_irqsave(q->queue_lock, flags); @@ -961,19 +894,6 @@ static void dm_complete_request(struct request *clone, int error) struct dm_rq_target_io *tio = clone->end_io_data; struct request *rq = tio->orig; - if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { - /* - * Barrier clones share an original request. So can't use - * softirq_done with the original. - * Pass the clone to dm_done() directly in this special case. - * It is safe (even if clone->q->queue_lock is held here) - * because there is no I/O dispatching during the completion - * of barrier clone. - */ - dm_done(clone, error, true); - return; - } - tio->error = error; rq->completion_data = clone; blk_complete_request(rq); @@ -990,17 +910,6 @@ void dm_kill_unmapped_request(struct request *clone, int error) struct dm_rq_target_io *tio = clone->end_io_data; struct request *rq = tio->orig; - if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) { - /* - * Barrier clones share an original request. - * Leave it to dm_end_request(), which handles this special - * case. - */ - BUG_ON(error > 0); - dm_end_request(clone, error); - return; - } - rq->cmd_flags |= REQ_FAILED; dm_complete_request(clone, error); } @@ -1119,7 +1028,7 @@ static void dm_bio_destructor(struct bio *bio) } /* - * Creates a little bio that is just does part of a bvec. + * Creates a little bio that just does part of a bvec. */ static struct bio *split_bvec(struct bio *bio, sector_t sector, unsigned short idx, unsigned int offset, @@ -1134,7 +1043,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; + clone->bi_rw = bio->bi_rw; clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; @@ -1161,7 +1070,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); - clone->bi_rw &= ~REQ_HARDBARRIER; clone->bi_destructor = dm_bio_destructor; clone->bi_sector = sector; clone->bi_idx = idx; @@ -1225,16 +1133,15 @@ static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, __issue_target_request(ci, ti, request_nr, len); } -static int __clone_and_map_empty_barrier(struct clone_info *ci) +static int __clone_and_map_empty_flush(struct clone_info *ci) { unsigned target_nr = 0; struct dm_target *ti; + BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __issue_target_requests(ci, ti, ti->num_flush_requests, 0); - ci->sector_count = 0; - return 0; } @@ -1289,9 +1196,6 @@ static int __clone_and_map(struct clone_info *ci) sector_t len = 0, max; struct dm_target_io *tio; - if (unlikely(bio_empty_barrier(bio))) - return __clone_and_map_empty_barrier(ci); - if (unlikely(bio->bi_rw & REQ_DISCARD)) return __clone_and_map_discard(ci); @@ -1383,16 +1287,11 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.map = dm_get_live_table(md); if (unlikely(!ci.map)) { - if (!(bio->bi_rw & REQ_HARDBARRIER)) - bio_io_error(bio); - else - if (!md->barrier_error) - md->barrier_error = -EIO; + bio_io_error(bio); return; } ci.md = md; - ci.bio = bio; ci.io = alloc_io(md); ci.io->error = 0; atomic_set(&ci.io->io_count, 1); @@ -1400,14 +1299,20 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.io->md = md; spin_lock_init(&ci.io->endio_lock); ci.sector = bio->bi_sector; - ci.sector_count = bio_sectors(bio); - if (unlikely(bio_empty_barrier(bio))) - ci.sector_count = 1; ci.idx = bio->bi_idx; start_io_acct(ci.io); - while (ci.sector_count && !error) - error = __clone_and_map(&ci); + if (bio->bi_rw & REQ_FLUSH) { + ci.bio = &ci.md->flush_bio; + ci.sector_count = 0; + error = __clone_and_map_empty_flush(&ci); + /* dec_pending submits any data associated with flush */ + } else { + ci.bio = bio; + ci.sector_count = bio_sectors(bio); + while (ci.sector_count && !error) + error = __clone_and_map(&ci); + } /* drop the extra reference count */ dec_pending(ci.io, error); @@ -1491,22 +1396,14 @@ static int _dm_request(struct request_queue *q, struct bio *bio) part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); part_stat_unlock(); - /* - * If we're suspended or the thread is processing barriers - * we have to queue this io for later. - */ - if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || - unlikely(bio->bi_rw & REQ_HARDBARRIER)) { + /* if we're suspended, we have to queue this io for later */ + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { up_read(&md->io_lock); - if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && - bio_rw(bio) == READA) { + if (bio_rw(bio) != READA) + queue_io(md, bio); + else bio_io_error(bio); - return 0; - } - - queue_io(md, bio); - return 0; } @@ -1537,14 +1434,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) return _dm_request(q, bio); } -static bool dm_rq_is_flush_request(struct request *rq) -{ - if (rq->cmd_flags & REQ_FLUSH) - return true; - else - return false; -} - void dm_dispatch_request(struct request *rq) { int r; @@ -1592,22 +1481,15 @@ static int setup_clone(struct request *clone, struct request *rq, { int r; - if (dm_rq_is_flush_request(rq)) { - blk_rq_init(NULL, clone); - clone->cmd_type = REQ_TYPE_FS; - clone->cmd_flags |= (REQ_HARDBARRIER | WRITE); - } else { - r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, - dm_rq_bio_constructor, tio); - if (r) - return r; - - clone->cmd = rq->cmd; - clone->cmd_len = rq->cmd_len; - clone->sense = rq->sense; - clone->buffer = rq->buffer; - } + r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, + dm_rq_bio_constructor, tio); + if (r) + return r; + clone->cmd = rq->cmd; + clone->cmd_len = rq->cmd_len; + clone->sense = rq->sense; + clone->buffer = rq->buffer; clone->end_io = end_clone_request; clone->end_io_data = tio; @@ -1648,9 +1530,6 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) struct mapped_device *md = q->queuedata; struct request *clone; - if (unlikely(dm_rq_is_flush_request(rq))) - return BLKPREP_OK; - if (unlikely(rq->special)) { DMWARN("Already has something in rq->special."); return BLKPREP_KILL; @@ -1727,6 +1606,7 @@ static void dm_request_fn(struct request_queue *q) struct dm_table *map = dm_get_live_table(md); struct dm_target *ti; struct request *rq, *clone; + sector_t pos; /* * For suspend, check blk_queue_stopped() and increment @@ -1739,15 +1619,14 @@ static void dm_request_fn(struct request_queue *q) if (!rq) goto plug_and_out; - if (unlikely(dm_rq_is_flush_request(rq))) { - BUG_ON(md->flush_request); - md->flush_request = rq; - blk_start_request(rq); - queue_work(md->wq, &md->barrier_work); - goto out; - } + /* always use block 0 to find the target for flushes for now */ + pos = 0; + if (!(rq->cmd_flags & REQ_FLUSH)) + pos = blk_rq_pos(rq); + + ti = dm_table_find_target(map, pos); + BUG_ON(!dm_target_is_valid(ti)); - ti = dm_table_find_target(map, blk_rq_pos(rq)); if (ti->type->busy && ti->type->busy(ti)) goto plug_and_out; @@ -1918,7 +1797,6 @@ out: static const struct block_device_operations dm_blk_dops; static void dm_wq_work(struct work_struct *work); -static void dm_rq_barrier_work(struct work_struct *work); static void dm_init_md_queue(struct mapped_device *md) { @@ -1940,6 +1818,7 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); md->queue->unplug_fn = dm_unplug_all; blk_queue_merge_bvec(md->queue, dm_merge_bvec); + blk_queue_flush(md->queue, REQ_FLUSH | REQ_FUA); } /* @@ -1972,7 +1851,6 @@ static struct mapped_device *alloc_dev(int minor) mutex_init(&md->suspend_lock); mutex_init(&md->type_lock); spin_lock_init(&md->deferred_lock); - spin_lock_init(&md->barrier_error_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); @@ -1995,7 +1873,6 @@ static struct mapped_device *alloc_dev(int minor) atomic_set(&md->pending[1], 0); init_waitqueue_head(&md->wait); INIT_WORK(&md->work, dm_wq_work); - INIT_WORK(&md->barrier_work, dm_rq_barrier_work); init_waitqueue_head(&md->eventq); md->disk->major = _major; @@ -2015,6 +1892,10 @@ static struct mapped_device *alloc_dev(int minor) if (!md->bdev) goto bad_bdev; + bio_init(&md->flush_bio); + md->flush_bio.bi_bdev = md->bdev; + md->flush_bio.bi_rw = WRITE_FLUSH; + /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); old_md = idr_replace(&_minor_idr, md, minor); @@ -2245,7 +2126,6 @@ static int dm_init_request_based_queue(struct mapped_device *md) blk_queue_softirq_done(md->queue, dm_softirq_done); blk_queue_prep_rq(md->queue, dm_prep_fn); blk_queue_lld_busy(md->queue, dm_lld_busy); - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH); elv_register_queue(md->queue); @@ -2406,43 +2286,6 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) return r; } -static void dm_flush(struct mapped_device *md) -{ - dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); - - bio_init(&md->barrier_bio); - md->barrier_bio.bi_bdev = md->bdev; - md->barrier_bio.bi_rw = WRITE_BARRIER; - __split_and_process_bio(md, &md->barrier_bio); - - dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); -} - -static void process_barrier(struct mapped_device *md, struct bio *bio) -{ - md->barrier_error = 0; - - dm_flush(md); - - if (!bio_empty_barrier(bio)) { - __split_and_process_bio(md, bio); - /* - * If the request isn't supported, don't waste time with - * the second flush. - */ - if (md->barrier_error != -EOPNOTSUPP) - dm_flush(md); - } - - if (md->barrier_error != DM_ENDIO_REQUEUE) - bio_endio(bio, md->barrier_error); - else { - spin_lock_irq(&md->deferred_lock); - bio_list_add_head(&md->deferred, bio); - spin_unlock_irq(&md->deferred_lock); - } -} - /* * Process the deferred bios */ @@ -2452,33 +2295,27 @@ static void dm_wq_work(struct work_struct *work) work); struct bio *c; - down_write(&md->io_lock); + down_read(&md->io_lock); while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { spin_lock_irq(&md->deferred_lock); c = bio_list_pop(&md->deferred); spin_unlock_irq(&md->deferred_lock); - if (!c) { - clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); + if (!c) break; - } - up_write(&md->io_lock); + up_read(&md->io_lock); if (dm_request_based(md)) generic_make_request(c); - else { - if (c->bi_rw & REQ_HARDBARRIER) - process_barrier(md, c); - else - __split_and_process_bio(md, c); - } + else + __split_and_process_bio(md, c); - down_write(&md->io_lock); + down_read(&md->io_lock); } - up_write(&md->io_lock); + up_read(&md->io_lock); } static void dm_queue_flush(struct mapped_device *md) @@ -2488,73 +2325,6 @@ static void dm_queue_flush(struct mapped_device *md) queue_work(md->wq, &md->work); } -static void dm_rq_set_target_request_nr(struct request *clone, unsigned request_nr) -{ - struct dm_rq_target_io *tio = clone->end_io_data; - - tio->info.target_request_nr = request_nr; -} - -/* Issue barrier requests to targets and wait for their completion. */ -static int dm_rq_barrier(struct mapped_device *md) -{ - int i, j; - struct dm_table *map = dm_get_live_table(md); - unsigned num_targets = dm_table_get_num_targets(map); - struct dm_target *ti; - struct request *clone; - - md->barrier_error = 0; - - for (i = 0; i < num_targets; i++) { - ti = dm_table_get_target(map, i); - for (j = 0; j < ti->num_flush_requests; j++) { - clone = clone_rq(md->flush_request, md, GFP_NOIO); - dm_rq_set_target_request_nr(clone, j); - atomic_inc(&md->pending[rq_data_dir(clone)]); - map_request(ti, clone, md); - } - } - - dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); - dm_table_put(map); - - return md->barrier_error; -} - -static void dm_rq_barrier_work(struct work_struct *work) -{ - int error; - struct mapped_device *md = container_of(work, struct mapped_device, - barrier_work); - struct request_queue *q = md->queue; - struct request *rq; - unsigned long flags; - - /* - * Hold the md reference here and leave it at the last part so that - * the md can't be deleted by device opener when the barrier request - * completes. - */ - dm_get(md); - - error = dm_rq_barrier(md); - - rq = md->flush_request; - md->flush_request = NULL; - - if (error == DM_ENDIO_REQUEUE) { - spin_lock_irqsave(q->queue_lock, flags); - blk_requeue_request(q, rq); - spin_unlock_irqrestore(q->queue_lock, flags); - } else - blk_end_request_all(rq, error); - - blk_run_queue(q); - - dm_put(md); -} - /* * Swap in a new table, returning the old one for the caller to destroy. */ @@ -2677,23 +2447,17 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) * * To get all processes out of __split_and_process_bio in dm_request, * we take the write lock. To prevent any process from reentering - * __split_and_process_bio from dm_request, we set - * DMF_QUEUE_IO_TO_THREAD. - * - * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND - * and call flush_workqueue(md->wq). flush_workqueue will wait until - * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any - * further calls to __split_and_process_bio from dm_wq_work. + * __split_and_process_bio from dm_request and quiesce the thread + * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call + * flush_workqueue(md->wq). */ down_write(&md->io_lock); set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); - set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); up_write(&md->io_lock); /* - * Request-based dm uses md->wq for barrier (dm_rq_barrier_work) which - * can be kicked until md->queue is stopped. So stop md->queue before - * flushing md->wq. + * Stop md->queue before flushing md->wq in case request-based + * dm defers requests to md->wq from md->queue. */ if (dm_request_based(md)) stop_queue(md->queue); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index ba19060..8a2f767 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -294,8 +294,8 @@ static int linear_make_request (mddev_t *mddev, struct bio *bio) dev_info_t *tmp_dev; sector_t start_sector; - if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { - md_barrier_request(mddev, bio); + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index c148b63..ed075d1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -226,12 +226,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio) return 0; } rcu_read_lock(); - if (mddev->suspended || mddev->barrier) { + if (mddev->suspended) { DEFINE_WAIT(__wait); for (;;) { prepare_to_wait(&mddev->sb_wait, &__wait, TASK_UNINTERRUPTIBLE); - if (!mddev->suspended && !mddev->barrier) + if (!mddev->suspended) break; rcu_read_unlock(); schedule(); @@ -282,40 +282,29 @@ EXPORT_SYMBOL_GPL(mddev_resume); int mddev_congested(mddev_t *mddev, int bits) { - if (mddev->barrier) - return 1; return mddev->suspended; } EXPORT_SYMBOL(mddev_congested); /* - * Generic barrier handling for md + * Generic flush handling for md */ -#define POST_REQUEST_BARRIER ((void*)1) - -static void md_end_barrier(struct bio *bio, int err) +static void md_end_flush(struct bio *bio, int err) { mdk_rdev_t *rdev = bio->bi_private; mddev_t *mddev = rdev->mddev; - if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER) - set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags); rdev_dec_pending(rdev, mddev); if (atomic_dec_and_test(&mddev->flush_pending)) { - if (mddev->barrier == POST_REQUEST_BARRIER) { - /* This was a post-request barrier */ - mddev->barrier = NULL; - wake_up(&mddev->sb_wait); - } else - /* The pre-request barrier has finished */ - schedule_work(&mddev->barrier_work); + /* The pre-request flush has finished */ + schedule_work(&mddev->flush_work); } bio_put(bio); } -static void submit_barriers(mddev_t *mddev) +static void submit_flushes(mddev_t *mddev) { mdk_rdev_t *rdev; @@ -332,60 +321,56 @@ static void submit_barriers(mddev_t *mddev) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); bi = bio_alloc(GFP_KERNEL, 0); - bi->bi_end_io = md_end_barrier; + bi->bi_end_io = md_end_flush; bi->bi_private = rdev; bi->bi_bdev = rdev->bdev; atomic_inc(&mddev->flush_pending); - submit_bio(WRITE_BARRIER, bi); + submit_bio(WRITE_FLUSH, bi); rcu_read_lock(); rdev_dec_pending(rdev, mddev); } rcu_read_unlock(); } -static void md_submit_barrier(struct work_struct *ws) +static void md_submit_flush_data(struct work_struct *ws) { - mddev_t *mddev = container_of(ws, mddev_t, barrier_work); - struct bio *bio = mddev->barrier; + mddev_t *mddev = container_of(ws, mddev_t, flush_work); + struct bio *bio = mddev->flush_bio; atomic_set(&mddev->flush_pending, 1); - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) - bio_endio(bio, -EOPNOTSUPP); - else if (bio->bi_size == 0) + if (bio->bi_size == 0) /* an empty barrier - all done */ bio_endio(bio, 0); else { - bio->bi_rw &= ~REQ_HARDBARRIER; + bio->bi_rw &= ~REQ_FLUSH; if (mddev->pers->make_request(mddev, bio)) generic_make_request(bio); - mddev->barrier = POST_REQUEST_BARRIER; - submit_barriers(mddev); } if (atomic_dec_and_test(&mddev->flush_pending)) { - mddev->barrier = NULL; + mddev->flush_bio = NULL; wake_up(&mddev->sb_wait); } } -void md_barrier_request(mddev_t *mddev, struct bio *bio) +void md_flush_request(mddev_t *mddev, struct bio *bio) { spin_lock_irq(&mddev->write_lock); wait_event_lock_irq(mddev->sb_wait, - !mddev->barrier, + !mddev->flush_bio, mddev->write_lock, /*nothing*/); - mddev->barrier = bio; + mddev->flush_bio = bio; spin_unlock_irq(&mddev->write_lock); atomic_set(&mddev->flush_pending, 1); - INIT_WORK(&mddev->barrier_work, md_submit_barrier); + INIT_WORK(&mddev->flush_work, md_submit_flush_data); - submit_barriers(mddev); + submit_flushes(mddev); if (atomic_dec_and_test(&mddev->flush_pending)) - schedule_work(&mddev->barrier_work); + schedule_work(&mddev->flush_work); } -EXPORT_SYMBOL(md_barrier_request); +EXPORT_SYMBOL(md_flush_request); /* Support for plugging. * This mirrors the plugging support in request_queue, but does not @@ -696,31 +681,6 @@ static void super_written(struct bio *bio, int error) bio_put(bio); } -static void super_written_barrier(struct bio *bio, int error) -{ - struct bio *bio2 = bio->bi_private; - mdk_rdev_t *rdev = bio2->bi_private; - mddev_t *mddev = rdev->mddev; - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && - error == -EOPNOTSUPP) { - unsigned long flags; - /* barriers don't appear to be supported :-( */ - set_bit(BarriersNotsupp, &rdev->flags); - mddev->barriers_work = 0; - spin_lock_irqsave(&mddev->write_lock, flags); - bio2->bi_next = mddev->biolist; - mddev->biolist = bio2; - spin_unlock_irqrestore(&mddev->write_lock, flags); - wake_up(&mddev->sb_wait); - bio_put(bio); - } else { - bio_put(bio2); - bio->bi_private = rdev; - super_written(bio, error); - } -} - void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page) { @@ -729,51 +689,28 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, * and decrement it on completion, waking up sb_wait * if zero is reached. * If an error occurred, call md_error - * - * As we might need to resubmit the request if REQ_HARDBARRIER - * causes ENOTSUPP, we allocate a spare bio... */ struct bio *bio = bio_alloc(GFP_NOIO, 1); - int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG; bio->bi_bdev = rdev->bdev; bio->bi_sector = sector; bio_add_page(bio, page, size, 0); bio->bi_private = rdev; bio->bi_end_io = super_written; - bio->bi_rw = rw; atomic_inc(&mddev->pending_writes); - if (!test_bit(BarriersNotsupp, &rdev->flags)) { - struct bio *rbio; - rw |= REQ_HARDBARRIER; - rbio = bio_clone(bio, GFP_NOIO); - rbio->bi_private = bio; - rbio->bi_end_io = super_written_barrier; - submit_bio(rw, rbio); - } else - submit_bio(rw, bio); + submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA, + bio); } void md_super_wait(mddev_t *mddev) { - /* wait for all superblock writes that were scheduled to complete. - * if any had to be retried (due to BARRIER problems), retry them - */ + /* wait for all superblock writes that were scheduled to complete */ DEFINE_WAIT(wq); for(;;) { prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); if (atomic_read(&mddev->pending_writes)==0) break; - while (mddev->biolist) { - struct bio *bio; - spin_lock_irq(&mddev->write_lock); - bio = mddev->biolist; - mddev->biolist = bio->bi_next ; - bio->bi_next = NULL; - spin_unlock_irq(&mddev->write_lock); - submit_bio(bio->bi_rw, bio); - } schedule(); } finish_wait(&mddev->sb_wait, &wq); @@ -1070,7 +1007,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) clear_bit(Faulty, &rdev->flags); clear_bit(In_sync, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); - clear_bit(BarriersNotsupp, &rdev->flags); if (mddev->raid_disks == 0) { mddev->major_version = 0; @@ -1485,7 +1421,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) clear_bit(Faulty, &rdev->flags); clear_bit(In_sync, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); - clear_bit(BarriersNotsupp, &rdev->flags); if (mddev->raid_disks == 0) { mddev->major_version = 1; @@ -1643,7 +1578,9 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev->sb_size = (rdev->sb_size | bmask) + 1; - } + } else + max_dev = le32_to_cpu(sb->max_dev); + for (i=0; idev_roles[i] = cpu_to_le16(0xfffe); @@ -2167,9 +2104,9 @@ repeat: rdev->recovery_offset = mddev->curr_resync_completed; } - if (mddev->external || !mddev->persistent) { - clear_bit(MD_CHANGE_DEVS, &mddev->flags); + if (!mddev->persistent) { clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_DEVS, &mddev->flags); wake_up(&mddev->sb_wait); return; } @@ -2178,7 +2115,6 @@ repeat: mddev->utime = get_seconds(); - set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) @@ -3371,7 +3307,7 @@ array_state_show(mddev_t *mddev, char *page) case 0: if (mddev->in_sync) st = clean; - else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + else if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) st = write_pending; else if (mddev->safemode) st = active_idle; @@ -3452,9 +3388,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) mddev->in_sync = 1; if (mddev->safemode == 1) mddev->safemode = 0; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, - &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } err = 0; } else @@ -3466,8 +3400,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) case active: if (mddev->pers) { restart_array(mddev); - if (mddev->external) - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + clear_bit(MD_CHANGE_PENDING, &mddev->flags); wake_up(&mddev->sb_wait); err = 0; } else { @@ -4506,7 +4439,6 @@ int md_run(mddev_t *mddev) /* may be over-ridden by personality */ mddev->resync_max_sectors = mddev->dev_sectors; - mddev->barriers_work = 1; mddev->ok_start_degraded = start_dirty_degraded; if (start_readonly && mddev->ro == 0) @@ -4685,7 +4617,6 @@ static void md_clean(mddev_t *mddev) mddev->recovery = 0; mddev->in_sync = 0; mddev->degraded = 0; - mddev->barriers_work = 0; mddev->safemode = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; @@ -6572,6 +6503,7 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); md_wakeup_thread(mddev->thread); did_change = 1; } @@ -6580,7 +6512,6 @@ void md_write_start(mddev_t *mddev, struct bio *bi) if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } @@ -6616,6 +6547,7 @@ int md_allow_write(mddev_t *mddev) if (mddev->in_sync) { mddev->in_sync = 0; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (mddev->safemode_delay && mddev->safemode == 0) mddev->safemode = 1; @@ -6625,7 +6557,7 @@ int md_allow_write(mddev_t *mddev) } else spin_unlock_irq(&mddev->write_lock); - if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) + if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) return -EAGAIN; else return 0; @@ -6823,8 +6755,7 @@ void md_do_sync(mddev_t *mddev) atomic_read(&mddev->recovery_active) == 0); mddev->curr_resync_completed = mddev->curr_resync; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -7073,7 +7004,7 @@ void md_check_recovery(mddev_t *mddev) if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return; if ( ! ( - (mddev->flags && !mddev->external) || + (mddev->flags & ~ (1<recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || (mddev->external == 0 && mddev->safemode == 1) || @@ -7103,8 +7034,7 @@ void md_check_recovery(mddev_t *mddev) mddev->recovery_cp == MaxSector) { mddev->in_sync = 1; did_change = 1; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + set_bit(MD_CHANGE_CLEAN, &mddev->flags); } if (mddev->safemode == 1) mddev->safemode = 0; diff --git a/drivers/md/md.h b/drivers/md/md.h index a953fe2..112a2c3 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -87,7 +87,6 @@ struct mdk_rdev_s #define Faulty 1 /* device is known to have a fault */ #define In_sync 2 /* device is in_sync with rest of array */ #define WriteMostly 4 /* Avoid reading if at all possible */ -#define BarriersNotsupp 5 /* REQ_HARDBARRIER is not supported */ #define AllReserved 6 /* If whole device is reserved for * one array */ #define AutoDetected 7 /* added by auto-detect */ @@ -140,7 +139,7 @@ struct mddev_s unsigned long flags; #define MD_CHANGE_DEVS 0 /* Some device status has changed */ #define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */ -#define MD_CHANGE_PENDING 2 /* superblock update in progress */ +#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ int suspended; atomic_t active_io; @@ -273,13 +272,6 @@ struct mddev_s int degraded; /* whether md should consider * adding a spare */ - int barriers_work; /* initialised to true, cleared as soon - * as a barrier request to slave - * fails. Only supported - */ - struct bio *biolist; /* bios that need to be retried - * because REQ_HARDBARRIER is not supported - */ atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; @@ -339,16 +331,13 @@ struct mddev_s struct attribute_group *to_remove; struct plug_handle *plug; /* if used by personality */ - /* Generic barrier handling. - * If there is a pending barrier request, all other - * writes are blocked while the devices are flushed. - * The last to finish a flush schedules a worker to - * submit the barrier request (without the barrier flag), - * then submit more flush requests. + /* Generic flush handling. + * The last to finish preflush schedules a worker to submit + * the rest of the request (without the REQ_FLUSH flag). */ - struct bio *barrier; + struct bio *flush_bio; atomic_t flush_pending; - struct work_struct barrier_work; + struct work_struct flush_work; struct work_struct event_work; /* used by dm to report failure event */ }; @@ -502,7 +491,7 @@ extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev); extern int mddev_congested(mddev_t *mddev, int bits); -extern void md_barrier_request(mddev_t *mddev, struct bio *bio); +extern void md_flush_request(mddev_t *mddev, struct bio *bio); extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, sector_t sector, int size, struct page *page); extern void md_super_wait(mddev_t *mddev); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 0307d21..6d7ddf3 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -142,8 +142,8 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; - if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { - md_barrier_request(mddev, bio); + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); return 0; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 6f7af46..a39f4c3 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -483,8 +483,8 @@ static int raid0_make_request(mddev_t *mddev, struct bio *bio) struct strip_zone *zone; mdk_rdev_t *tmp_dev; - if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { - md_barrier_request(mddev, bio); + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); return 0; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ad83a4d..886a9d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error) if (r1_bio->bios[mirror] == bio) break; - if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { - set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); - set_bit(R1BIO_BarrierRetry, &r1_bio->state); - r1_bio->mddev->barriers_work = 0; - /* Don't rdev_dec_pending in this branch - keep it for the retry */ - } else { + /* + * 'one mirror IO has finished' event handler: + */ + r1_bio->bios[mirror] = NULL; + to_put = bio; + if (!uptodate) { + md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); + /* an I/O failed, we can't clear the bitmap */ + set_bit(R1BIO_Degraded, &r1_bio->state); + } else /* - * this branch is our 'one mirror IO has finished' event handler: + * Set R1BIO_Uptodate in our master bio, so that we + * will return a good error code for to the higher + * levels even if IO on some other mirrored buffer + * fails. + * + * The 'master' represents the composite IO operation + * to user-side. So if something waits for IO, then it + * will wait for the 'master' bio. */ - r1_bio->bios[mirror] = NULL; - to_put = bio; - if (!uptodate) { - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - } else - /* - * Set R1BIO_Uptodate in our master bio, so that - * we will return a good error code for to the higher - * levels even if IO on some other mirrored buffer fails. - * - * The 'master' represents the composite IO operation to - * user-side. So if something waits for IO, then it will - * wait for the 'master' bio. - */ - set_bit(R1BIO_Uptodate, &r1_bio->state); - - update_head_pos(mirror, r1_bio); - - if (behind) { - if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) - atomic_dec(&r1_bio->behind_remaining); - - /* In behind mode, we ACK the master bio once the I/O has safely - * reached all non-writemostly disks. Setting the Returned bit - * ensures that this gets done only once -- we don't ever want to - * return -EIO here, instead we'll wait */ - - if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && - test_bit(R1BIO_Uptodate, &r1_bio->state)) { - /* Maybe we can return now */ - if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { - struct bio *mbio = r1_bio->master_bio; - PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", - (unsigned long long) mbio->bi_sector, - (unsigned long long) mbio->bi_sector + - (mbio->bi_size >> 9) - 1); - bio_endio(mbio, 0); - } + set_bit(R1BIO_Uptodate, &r1_bio->state); + + update_head_pos(mirror, r1_bio); + + if (behind) { + if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) + atomic_dec(&r1_bio->behind_remaining); + + /* + * In behind mode, we ACK the master bio once the I/O + * has safely reached all non-writemostly + * disks. Setting the Returned bit ensures that this + * gets done only once -- we don't ever want to return + * -EIO here, instead we'll wait + */ + if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && + test_bit(R1BIO_Uptodate, &r1_bio->state)) { + /* Maybe we can return now */ + if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { + struct bio *mbio = r1_bio->master_bio; + PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", + (unsigned long long) mbio->bi_sector, + (unsigned long long) mbio->bi_sector + + (mbio->bi_size >> 9) - 1); + bio_endio(mbio, 0); } } - rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } + rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); + /* - * * Let's see if all mirrored write operations have finished * already. */ if (atomic_dec_and_test(&r1_bio->remaining)) { - if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) - reschedule_retry(r1_bio); - else { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - /* free extra copy of the data pages */ - int i = bio->bi_vcnt; - while (i--) - safe_put_page(bio->bi_io_vec[i].bv_page); - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - behind); - md_write_end(r1_bio->mddev); - raid_end_bio_io(r1_bio); + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + /* free extra copy of the data pages */ + int i = bio->bi_vcnt; + while (i--) + safe_put_page(bio->bi_io_vec[i].bv_page); } + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + behind); + md_write_end(r1_bio->mddev); + raid_end_bio_io(r1_bio); } if (to_put) @@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); - unsigned long do_barriers; + const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); mdk_rdev_t *blocked_rdev; /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. - * We test barriers_work *after* md_write_start as md_write_start - * may cause the first superblock write, and that will check out - * if barriers work. */ md_write_start(mddev, bio); /* wait on superblock update early */ @@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) } finish_wait(&conf->wait_barrier, &w); } - if (unlikely(!mddev->barriers_work && - (bio->bi_rw & REQ_HARDBARRIER))) { - if (rw == WRITE) - md_write_end(mddev); - bio_endio(bio, -EOPNOTSUPP); - return 0; - } wait_barrier(conf); @@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) atomic_set(&r1_bio->remaining, 0); atomic_set(&r1_bio->behind_remaining, 0); - do_barriers = bio->bi_rw & REQ_HARDBARRIER; - if (do_barriers) - set_bit(R1BIO_Barrier, &r1_bio->state); - bio_list_init(&bl); for (i = 0; i < disks; i++) { struct bio *mbio; @@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_barriers | do_sync; + mbio->bi_rw = WRITE | do_flush_fua | do_sync; mbio->bi_private = r1_bio; if (behind_pages) { @@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev) if (test_bit(R1BIO_IsSync, &r1_bio->state)) { sync_request_write(mddev, r1_bio); unplug = 1; - } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { - /* some requests in the r1bio were REQ_HARDBARRIER - * requests which failed with -EOPNOTSUPP. Hohumm.. - * Better resubmit without the barrier. - * We know which devices to resubmit for, because - * all others have had their bios[] entry cleared. - * We already have a nr_pending reference on these rdevs. - */ - int i; - const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); - clear_bit(R1BIO_BarrierRetry, &r1_bio->state); - clear_bit(R1BIO_Barrier, &r1_bio->state); - for (i=0; i < conf->raid_disks; i++) - if (r1_bio->bios[i]) - atomic_inc(&r1_bio->remaining); - for (i=0; i < conf->raid_disks; i++) - if (r1_bio->bios[i]) { - struct bio_vec *bvec; - int j; - - bio = bio_clone(r1_bio->master_bio, GFP_NOIO); - /* copy pages from the failed bio, as - * this might be a write-behind device */ - __bio_for_each_segment(bvec, bio, j, 0) - bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; - bio_put(r1_bio->bios[i]); - bio->bi_sector = r1_bio->sector + - conf->mirrors[i].rdev->data_offset; - bio->bi_bdev = conf->mirrors[i].rdev->bdev; - bio->bi_end_io = raid1_end_write_request; - bio->bi_rw = WRITE | do_sync; - bio->bi_private = r1_bio; - r1_bio->bios[i] = bio; - generic_make_request(bio); - } } else { int disk; diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index 5f2d443..adf8cfd 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -117,8 +117,6 @@ struct r1bio_s { #define R1BIO_IsSync 1 #define R1BIO_Degraded 2 #define R1BIO_BehindIO 3 -#define R1BIO_Barrier 4 -#define R1BIO_BarrierRetry 5 /* For write-behind requests, we call bi_end_io when * the last non-write-behind device completes, providing * any write was successful. Otherwise we call when diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8471838..f0d082f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -800,12 +800,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); + const unsigned long do_fua = (bio->bi_rw & REQ_FUA); struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; - if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { - md_barrier_request(mddev, bio); + if (unlikely(bio->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bio); return 0; } @@ -965,7 +966,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) conf->mirrors[d].rdev->data_offset; mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | do_sync; + mbio->bi_rw = WRITE | do_sync | do_fua; mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 69b0a16..31140d1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -506,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) int rw; struct bio *bi; mdk_rdev_t *rdev; - if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) - rw = WRITE; - else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) + if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { + if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) + rw = WRITE_FUA; + else + rw = WRITE; + } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) rw = READ; else continue; @@ -1031,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { + if (wbi->bi_rw & REQ_FUA) + set_bit(R5_WantFUA, &dev->flags); tx = async_copy_data(1, wbi, dev->page, dev->sector, tx); wbi = r5_next_bio(wbi, dev->sector); @@ -1048,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref) int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; int i; + bool fua = false; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + for (i = disks; i--; ) + fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); + for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (dev->written || i == pd_idx || i == qd_idx) + if (dev->written || i == pd_idx || i == qd_idx) { set_bit(R5_UPTODATE, &dev->flags); + if (fua) + set_bit(R5_WantFUA, &dev->flags); + } } if (sh->reconstruct_state == reconstruct_state_drain_run) @@ -3281,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh) if (dec_preread_active) { /* We delay this until after ops_run_io so that if make_request - * is waiting on a barrier, it won't continue until the writes + * is waiting on a flush, it won't continue until the writes * have actually been submitted. */ atomic_dec(&conf->preread_active_stripes); @@ -3583,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh) if (dec_preread_active) { /* We delay this until after ops_run_io so that if make_request - * is waiting on a barrier, it won't continue until the writes + * is waiting on a flush, it won't continue until the writes * have actually been submitted. */ atomic_dec(&conf->preread_active_stripes); @@ -3978,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi) const int rw = bio_data_dir(bi); int remaining; - if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) { - /* Drain all pending writes. We only really need - * to ensure they have been submitted, but this is - * easier. - */ - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); - md_barrier_request(mddev, bi); + if (unlikely(bi->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bi); return 0; } @@ -4103,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); - if (mddev->barrier && + if ((bi->bi_rw & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); release_stripe(sh); @@ -4126,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi) bio_endio(bi, 0); } - if (mddev->barrier) { - /* We need to wait for the stripes to all be handled. - * So: wait for preread_active_stripes to drop to 0. - */ - wait_event(mddev->thread->wqueue, - atomic_read(&conf->preread_active_stripes) == 0); - } return 0; } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 36eaed5..2ace058 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -275,6 +275,7 @@ struct r6_state { * filling */ #define R5_Wantdrain 13 /* dev->towrite needs to be drained */ +#define R5_WantFUA 14 /* Write should be FUA */ /* * Write method */ diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index e876678..9c0b42b 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -128,7 +128,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock mq->req = NULL; blk_queue_prep_rq(mq->queue, mmc_prep_request); - blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); if (mmc_can_erase(card)) { queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue); diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index bd2755e..f332c52 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -362,9 +362,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto err; } - err = mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid); - - if (!err) { + if (ocr & R4_MEMORY_PRESENT + && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) { card->type = MMC_TYPE_SD_COMBO; if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO || diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c index 5f3a599..87226cd 100644 --- a/drivers/mmc/host/at91_mci.c +++ b/drivers/mmc/host/at91_mci.c @@ -66,6 +66,7 @@ #include #include #include +#include #include diff --git a/drivers/mmc/host/imxmmc.c b/drivers/mmc/host/imxmmc.c index 9a68ff4..5a950b1 100644 --- a/drivers/mmc/host/imxmmc.c +++ b/drivers/mmc/host/imxmmc.c @@ -148,11 +148,12 @@ static int imxmci_start_clock(struct imxmci_host *host) while (delay--) { reg = readw(host->base + MMC_REG_STATUS); - if (reg & STATUS_CARD_BUS_CLK_RUN) + if (reg & STATUS_CARD_BUS_CLK_RUN) { /* Check twice before cut */ reg = readw(host->base + MMC_REG_STATUS); if (reg & STATUS_CARD_BUS_CLK_RUN) return 0; + } if (test_bit(IMXMCI_PEND_STARTED_b, &host->pending_events)) return 0; diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 4a8776f..4526d27 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2305,7 +2305,6 @@ static int omap_hsmmc_suspend(struct device *dev) int ret = 0; struct platform_device *pdev = to_platform_device(dev); struct omap_hsmmc_host *host = platform_get_drvdata(pdev); - pm_message_t state = PMSG_SUSPEND; /* unused by MMC core */ if (host && host->suspended) return 0; @@ -2324,8 +2323,8 @@ static int omap_hsmmc_suspend(struct device *dev) } } cancel_work_sync(&host->mmc_carddetect_work); - mmc_host_enable(host->mmc); ret = mmc_suspend_host(host->mmc); + mmc_host_enable(host->mmc); if (ret == 0) { omap_hsmmc_disable_irq(host); OMAP_HSMMC_WRITE(host->base, HCTL, diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 2e16e0a..976330d 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -1600,7 +1600,7 @@ static int __devinit s3cmci_probe(struct platform_device *pdev) host->pio_active = XFER_NONE; #ifdef CONFIG_MMC_S3C_PIODMA - host->dodma = host->pdata->dma; + host->dodma = host->pdata->use_dma; #endif host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index ee7d0a5..69d98e3 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -164,6 +164,7 @@ tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd) static void tmio_mmc_pio_irq(struct tmio_mmc_host *host) { struct mmc_data *data = host->data; + void *sg_virt; unsigned short *buf; unsigned int count; unsigned long flags; @@ -173,8 +174,8 @@ static void tmio_mmc_pio_irq(struct tmio_mmc_host *host) return; } - buf = (unsigned short *)(tmio_mmc_kmap_atomic(host, &flags) + - host->sg_off); + sg_virt = tmio_mmc_kmap_atomic(host->sg_ptr, &flags); + buf = (unsigned short *)(sg_virt + host->sg_off); count = host->sg_ptr->length - host->sg_off; if (count > data->blksz) @@ -191,7 +192,7 @@ static void tmio_mmc_pio_irq(struct tmio_mmc_host *host) host->sg_off += count; - tmio_mmc_kunmap_atomic(host, &flags); + tmio_mmc_kunmap_atomic(sg_virt, &flags); if (host->sg_off == host->sg_ptr->length) tmio_mmc_next_sg(host); diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h index 64f7d5d..0fedc78 100644 --- a/drivers/mmc/host/tmio_mmc.h +++ b/drivers/mmc/host/tmio_mmc.h @@ -82,10 +82,7 @@ #define ack_mmc_irqs(host, i) \ do { \ - u32 mask;\ - mask = sd_ctrl_read32((host), CTL_STATUS); \ - mask &= ~((i) & TMIO_MASK_IRQ); \ - sd_ctrl_write32((host), CTL_STATUS, mask); \ + sd_ctrl_write32((host), CTL_STATUS, ~(i)); \ } while (0) @@ -177,19 +174,17 @@ static inline int tmio_mmc_next_sg(struct tmio_mmc_host *host) return --host->sg_len; } -static inline char *tmio_mmc_kmap_atomic(struct tmio_mmc_host *host, +static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg, unsigned long *flags) { - struct scatterlist *sg = host->sg_ptr; - local_irq_save(*flags); return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset; } -static inline void tmio_mmc_kunmap_atomic(struct tmio_mmc_host *host, +static inline void tmio_mmc_kunmap_atomic(void *virt, unsigned long *flags) { - kunmap_atomic(sg_page(host->sg_ptr), KM_BIO_SRC_IRQ); + kunmap_atomic(virt, KM_BIO_SRC_IRQ); local_irq_restore(*flags); } diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c index a382e3d..6fbeefa 100644 --- a/drivers/mtd/nand/bf5xx_nand.c +++ b/drivers/mtd/nand/bf5xx_nand.c @@ -682,7 +682,6 @@ static int __devinit bf5xx_nand_add_partition(struct bf5xx_nand_info *info) static int __devexit bf5xx_nand_remove(struct platform_device *pdev) { struct bf5xx_nand_info *info = to_nand_info(pdev); - struct mtd_info *mtd = NULL; platform_set_drvdata(pdev, NULL); @@ -690,11 +689,7 @@ static int __devexit bf5xx_nand_remove(struct platform_device *pdev) * and their partitions, then go through freeing the * resources used */ - mtd = &info->mtd; - if (mtd) { - nand_release(mtd); - kfree(mtd); - } + nand_release(&info->mtd); peripheral_free_list(bfin_nfc_pin_req); bf5xx_nand_dma_remove(info); @@ -710,7 +705,7 @@ static int bf5xx_nand_scan(struct mtd_info *mtd) struct nand_chip *chip = mtd->priv; int ret; - ret = nand_scan_ident(mtd, 1); + ret = nand_scan_ident(mtd, 1, NULL); if (ret) return ret; diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index fcf8ceb..b2828e8 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -67,7 +67,9 @@ #define NFC_V1_V2_CONFIG1_BIG (1 << 5) #define NFC_V1_V2_CONFIG1_RST (1 << 6) #define NFC_V1_V2_CONFIG1_CE (1 << 7) -#define NFC_V1_V2_CONFIG1_ONE_CYCLE (1 << 8) +#define NFC_V2_CONFIG1_ONE_CYCLE (1 << 8) +#define NFC_V2_CONFIG1_PPB(x) (((x) & 0x3) << 9) +#define NFC_V2_CONFIG1_FP_INT (1 << 11) #define NFC_V1_V2_CONFIG2_INT (1 << 15) @@ -402,16 +404,16 @@ static void send_read_id_v1_v2(struct mxc_nand_host *host) /* Wait for operation to complete */ wait_op_done(host, true); + memcpy(host->data_buf, host->main_area0, 16); + if (this->options & NAND_BUSWIDTH_16) { - void __iomem *main_buf = host->main_area0; /* compress the ID info */ - writeb(readb(main_buf + 2), main_buf + 1); - writeb(readb(main_buf + 4), main_buf + 2); - writeb(readb(main_buf + 6), main_buf + 3); - writeb(readb(main_buf + 8), main_buf + 4); - writeb(readb(main_buf + 10), main_buf + 5); + host->data_buf[1] = host->data_buf[2]; + host->data_buf[2] = host->data_buf[4]; + host->data_buf[3] = host->data_buf[6]; + host->data_buf[4] = host->data_buf[8]; + host->data_buf[5] = host->data_buf[10]; } - memcpy(host->data_buf, host->main_area0, 16); } static uint16_t get_dev_status_v3(struct mxc_nand_host *host) @@ -729,27 +731,30 @@ static void preset_v1_v2(struct mtd_info *mtd) { struct nand_chip *nand_chip = mtd->priv; struct mxc_nand_host *host = nand_chip->priv; - uint16_t tmp; - - /* enable interrupt, disable spare enable */ - tmp = readw(NFC_V1_V2_CONFIG1); - tmp &= ~NFC_V1_V2_CONFIG1_INT_MSK; - tmp &= ~NFC_V1_V2_CONFIG1_SP_EN; - if (nand_chip->ecc.mode == NAND_ECC_HW) { - tmp |= NFC_V1_V2_CONFIG1_ECC_EN; - } else { - tmp &= ~NFC_V1_V2_CONFIG1_ECC_EN; - } + uint16_t config1 = 0; + + if (nand_chip->ecc.mode == NAND_ECC_HW) + config1 |= NFC_V1_V2_CONFIG1_ECC_EN; + + if (nfc_is_v21()) + config1 |= NFC_V2_CONFIG1_FP_INT; + + if (!cpu_is_mx21()) + config1 |= NFC_V1_V2_CONFIG1_INT_MSK; if (nfc_is_v21() && mtd->writesize) { + uint16_t pages_per_block = mtd->erasesize / mtd->writesize; + host->eccsize = get_eccsize(mtd); if (host->eccsize == 4) - tmp |= NFC_V2_CONFIG1_ECC_MODE_4; + config1 |= NFC_V2_CONFIG1_ECC_MODE_4; + + config1 |= NFC_V2_CONFIG1_PPB(ffs(pages_per_block) - 6); } else { host->eccsize = 1; } - writew(tmp, NFC_V1_V2_CONFIG1); + writew(config1, NFC_V1_V2_CONFIG1); /* preset operation */ /* Unlock the internal RAM Buffer */ diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 4d89f37..4d01cda6 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1320,6 +1320,7 @@ static int pxa3xx_nand_probe(struct platform_device *pdev) goto fail_free_irq; } +#ifdef CONFIG_MTD_PARTITIONS if (mtd_has_cmdlinepart()) { static const char *probes[] = { "cmdlinepart", NULL }; struct mtd_partition *parts; @@ -1332,6 +1333,9 @@ static int pxa3xx_nand_probe(struct platform_device *pdev) } return add_mtd_partitions(mtd, pdata->parts, pdata->nr_parts); +#else + return 0; +#endif fail_free_irq: free_irq(irq, info); @@ -1364,7 +1368,9 @@ static int pxa3xx_nand_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); del_mtd_device(mtd); +#ifdef CONFIG_MTD_PARTITIONS del_mtd_partitions(mtd); +#endif irq = platform_get_irq(pdev, 0); if (irq >= 0) free_irq(irq, info); diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c index cb443af..a460f1b 100644 --- a/drivers/mtd/onenand/samsung.c +++ b/drivers/mtd/onenand/samsung.c @@ -554,14 +554,13 @@ static int s5pc110_dma_ops(void *dst, void *src, size_t count, int direction) do { status = readl(base + S5PC110_DMA_TRANS_STATUS); + if (status & S5PC110_DMA_TRANS_STATUS_TE) { + writel(S5PC110_DMA_TRANS_CMD_TEC, + base + S5PC110_DMA_TRANS_CMD); + return -EIO; + } } while (!(status & S5PC110_DMA_TRANS_STATUS_TD)); - if (status & S5PC110_DMA_TRANS_STATUS_TE) { - writel(S5PC110_DMA_TRANS_CMD_TEC, base + S5PC110_DMA_TRANS_CMD); - writel(S5PC110_DMA_TRANS_CMD_TDC, base + S5PC110_DMA_TRANS_CMD); - return -EIO; - } - writel(S5PC110_DMA_TRANS_CMD_TDC, base + S5PC110_DMA_TRANS_CMD); return 0; @@ -571,13 +570,12 @@ static int s5pc110_read_bufferram(struct mtd_info *mtd, int area, unsigned char *buffer, int offset, size_t count) { struct onenand_chip *this = mtd->priv; - void __iomem *bufferram; void __iomem *p; void *buf = (void *) buffer; dma_addr_t dma_src, dma_dst; int err; - p = bufferram = this->base + area; + p = this->base + area; if (ONENAND_CURRENT_BUFFERRAM(this)) { if (area == ONENAND_DATARAM) p += this->writesize; @@ -621,7 +619,7 @@ static int s5pc110_read_bufferram(struct mtd_info *mtd, int area, normal: if (count != mtd->writesize) { /* Copy the bufferram to memory to prevent unaligned access */ - memcpy(this->page_buf, bufferram, mtd->writesize); + memcpy(this->page_buf, p, mtd->writesize); p = this->page_buf + offset; } diff --git a/drivers/mtd/ubi/Kconfig.debug b/drivers/mtd/ubi/Kconfig.debug index 2246f15..61f6e5e 100644 --- a/drivers/mtd/ubi/Kconfig.debug +++ b/drivers/mtd/ubi/Kconfig.debug @@ -6,7 +6,7 @@ config MTD_UBI_DEBUG depends on SYSFS depends on MTD_UBI select DEBUG_FS - select KALLSYMS_ALL + select KALLSYMS_ALL if KALLSYMS && DEBUG_KERNEL help This option enables UBI debugging. diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 4dfa6b9..3d2d1a6 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -798,18 +798,18 @@ static int rename_volumes(struct ubi_device *ubi, goto out_free; } - re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); - if (!re) { + re1 = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); + if (!re1) { err = -ENOMEM; ubi_close_volume(desc); goto out_free; } - re->remove = 1; - re->desc = desc; - list_add(&re->list, &rename_list); + re1->remove = 1; + re1->desc = desc; + list_add(&re1->list, &rename_list); dbg_msg("will remove volume %d, name \"%s\"", - re->desc->vol->vol_id, re->desc->vol->name); + re1->desc->vol->vol_id, re1->desc->vol->name); } mutex_lock(&ubi->device_mutex); diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index 372a15a..69b52e9 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -843,7 +843,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, case UBI_COMPAT_DELETE: ubi_msg("\"delete\" compatible internal volume %d:%d" " found, will remove it", vol_id, lnum); - err = add_to_list(si, pnum, ec, &si->corr); + err = add_to_list(si, pnum, ec, &si->erase); if (err) return err; return 0; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index ee7b1d8..97a4356 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1212,7 +1212,8 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) retry: spin_lock(&ubi->wl_lock); e = ubi->lookuptbl[pnum]; - if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { + if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || + in_wl_tree(e, &ubi->erroneous)) { spin_unlock(&ubi->wl_lock); return 0; } diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index c685a55..fa42103 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -635,6 +635,9 @@ struct vortex_private { must_free_region:1, /* Flag: if zero, Cardbus owns the I/O region */ large_frames:1, /* accept large frames */ handling_irq:1; /* private in_irq indicator */ + /* {get|set}_wol operations are already serialized by rtnl. + * no additional locking is required for the enable_wol and acpi_set_WOL() + */ int drv_flags; u16 status_enable; u16 intr_enable; @@ -647,7 +650,7 @@ struct vortex_private { u16 io_size; /* Size of PCI region (for release_region) */ /* Serialises access to hardware other than MII and variables below. - * The lock hierarchy is rtnl_lock > lock > mii_lock > window_lock. */ + * The lock hierarchy is rtnl_lock > {lock, mii_lock} > window_lock. */ spinlock_t lock; spinlock_t mii_lock; /* Serialises access to MII */ @@ -1994,10 +1997,9 @@ vortex_error(struct net_device *dev, int status) } } - if (status & RxEarly) { /* Rx early is unused. */ - vortex_rx(dev); + if (status & RxEarly) /* Rx early is unused. */ iowrite16(AckIntr | RxEarly, ioaddr + EL3_CMD); - } + if (status & StatsFull) { /* Empty statistics. */ static int DoneDidThat; if (vortex_debug > 4) @@ -2298,7 +2300,12 @@ vortex_interrupt(int irq, void *dev_id) if (status & (HostError | RxEarly | StatsFull | TxComplete | IntReq)) { if (status == 0xffff) break; + if (status & RxEarly) + vortex_rx(dev); + spin_unlock(&vp->window_lock); vortex_error(dev, status); + spin_lock(&vp->window_lock); + window_set(vp, 7); } if (--work_done < 0) { @@ -2935,13 +2942,11 @@ static void vortex_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct vortex_private *vp = netdev_priv(dev); - spin_lock_irq(&vp->lock); wol->supported = WAKE_MAGIC; wol->wolopts = 0; if (vp->enable_wol) wol->wolopts |= WAKE_MAGIC; - spin_unlock_irq(&vp->lock); } static int vortex_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) @@ -2950,13 +2955,11 @@ static int vortex_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (wol->wolopts & ~WAKE_MAGIC) return -EINVAL; - spin_lock_irq(&vp->lock); if (wol->wolopts & WAKE_MAGIC) vp->enable_wol = 1; else vp->enable_wol = 0; acpi_set_WOL(dev); - spin_unlock_irq(&vp->lock); return 0; } @@ -2984,7 +2987,6 @@ static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { int err; struct vortex_private *vp = netdev_priv(dev); - unsigned long flags; pci_power_t state = 0; if(VORTEX_PCI(vp)) @@ -2994,9 +2996,7 @@ static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if(state != 0) pci_set_power_state(VORTEX_PCI(vp), PCI_D0); - spin_lock_irqsave(&vp->lock, flags); err = generic_mii_ioctl(&vp->mii, if_mii(rq), cmd, NULL); - spin_unlock_irqrestore(&vp->lock, flags); if(state != 0) pci_set_power_state(VORTEX_PCI(vp), state); diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 37617ab..1e620e2 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -848,6 +848,15 @@ static int b44_poll(struct napi_struct *napi, int budget) b44_tx(bp); /* spin_unlock(&bp->tx_lock); */ } + if (bp->istat & ISTAT_RFO) { /* fast recovery, in ~20msec */ + bp->istat &= ~ISTAT_RFO; + b44_disable_ints(bp); + ssb_device_enable(bp->sdev, 0); /* resets ISTAT_RFO */ + b44_init_rings(bp); + b44_init_hw(bp, B44_FULL_RESET_SKIP_PHY); + netif_wake_queue(bp->dev); + } + spin_unlock_irqrestore(&bp->lock, flags); work_done = 0; diff --git a/drivers/net/benet/be.h b/drivers/net/benet/be.h index 99197bd..53306bf 100644 --- a/drivers/net/benet/be.h +++ b/drivers/net/benet/be.h @@ -181,6 +181,7 @@ struct be_drvr_stats { u64 be_rx_bytes_prev; u64 be_rx_pkts; u32 be_rx_rate; + u32 be_rx_mcast_pkt; /* number of non ether type II frames dropped where * frame len > length field of Mac Hdr */ u32 be_802_3_dropped_frames; diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c index 3d30549..34abcc9 100644 --- a/drivers/net/benet/be_cmds.c +++ b/drivers/net/benet/be_cmds.c @@ -140,10 +140,8 @@ int be_process_mcc(struct be_adapter *adapter, int *status) while ((compl = be_mcc_compl_get(adapter))) { if (compl->flags & CQE_FLAGS_ASYNC_MASK) { /* Interpret flags as an async trailer */ - BUG_ON(!is_link_state_evt(compl->flags)); - - /* Interpret compl as a async link evt */ - be_async_link_state_process(adapter, + if (is_link_state_evt(compl->flags)) + be_async_link_state_process(adapter, (struct be_async_event_link_state *) compl); } else if (compl->flags & CQE_FLAGS_COMPLETED_MASK) { *status = be_mcc_compl_process(adapter, compl); @@ -207,7 +205,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db) if (msecs > 4000) { dev_err(&adapter->pdev->dev, "mbox poll timed out\n"); - be_dump_ue(adapter); + be_detect_dump_ue(adapter); return -1; } diff --git a/drivers/net/benet/be_cmds.h b/drivers/net/benet/be_cmds.h index bdc10a2..ad1e6fa 100644 --- a/drivers/net/benet/be_cmds.h +++ b/drivers/net/benet/be_cmds.h @@ -992,5 +992,5 @@ extern int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, extern int be_cmd_get_phy_info(struct be_adapter *adapter, struct be_dma_mem *cmd); extern int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain); -extern void be_dump_ue(struct be_adapter *adapter); +extern void be_detect_dump_ue(struct be_adapter *adapter); diff --git a/drivers/net/benet/be_ethtool.c b/drivers/net/benet/be_ethtool.c index cd16243..13f0abb 100644 --- a/drivers/net/benet/be_ethtool.c +++ b/drivers/net/benet/be_ethtool.c @@ -60,6 +60,7 @@ static const struct be_ethtool_stat et_stats[] = { {DRVSTAT_INFO(be_rx_events)}, {DRVSTAT_INFO(be_tx_compl)}, {DRVSTAT_INFO(be_rx_compl)}, + {DRVSTAT_INFO(be_rx_mcast_pkt)}, {DRVSTAT_INFO(be_ethrx_post_fail)}, {DRVSTAT_INFO(be_802_3_dropped_frames)}, {DRVSTAT_INFO(be_802_3_malformed_frames)}, diff --git a/drivers/net/benet/be_hw.h b/drivers/net/benet/be_hw.h index 5d38046..a2ec5df 100644 --- a/drivers/net/benet/be_hw.h +++ b/drivers/net/benet/be_hw.h @@ -167,8 +167,11 @@ #define FLASH_FCoE_BIOS_START_g3 (13631488) #define FLASH_REDBOOT_START_g3 (262144) - - +/************* Rx Packet Type Encoding **************/ +#define BE_UNICAST_PACKET 0 +#define BE_MULTICAST_PACKET 1 +#define BE_BROADCAST_PACKET 2 +#define BE_RSVD_PACKET 3 /* * BE descriptors: host memory data structures whose formats diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c index 74e146f..6eda7a0 100644 --- a/drivers/net/benet/be_main.c +++ b/drivers/net/benet/be_main.c @@ -247,6 +247,7 @@ void netdev_stats_update(struct be_adapter *adapter) dev_stats->tx_packets = drvr_stats(adapter)->be_tx_pkts; dev_stats->rx_bytes = drvr_stats(adapter)->be_rx_bytes; dev_stats->tx_bytes = drvr_stats(adapter)->be_tx_bytes; + dev_stats->multicast = drvr_stats(adapter)->be_rx_mcast_pkt; /* bad pkts received */ dev_stats->rx_errors = port_stats->rx_crc_errors + @@ -294,7 +295,6 @@ void netdev_stats_update(struct be_adapter *adapter) /* no space available in linux */ dev_stats->tx_dropped = 0; - dev_stats->multicast = port_stats->rx_multicast_frames; dev_stats->collisions = 0; /* detailed tx_errors */ @@ -848,7 +848,7 @@ static void be_rx_rate_update(struct be_adapter *adapter) } static void be_rx_stats_update(struct be_adapter *adapter, - u32 pktsize, u16 numfrags) + u32 pktsize, u16 numfrags, u8 pkt_type) { struct be_drvr_stats *stats = drvr_stats(adapter); @@ -856,6 +856,9 @@ static void be_rx_stats_update(struct be_adapter *adapter, stats->be_rx_frags += numfrags; stats->be_rx_bytes += pktsize; stats->be_rx_pkts++; + + if (pkt_type == BE_MULTICAST_PACKET) + stats->be_rx_mcast_pkt++; } static inline bool do_pkt_csum(struct be_eth_rx_compl *rxcp, bool cso) @@ -925,9 +928,11 @@ static void skb_fill_rx_data(struct be_adapter *adapter, u16 rxq_idx, i, j; u32 pktsize, hdr_len, curr_frag_len, size; u8 *start; + u8 pkt_type; rxq_idx = AMAP_GET_BITS(struct amap_eth_rx_compl, fragndx, rxcp); pktsize = AMAP_GET_BITS(struct amap_eth_rx_compl, pktsize, rxcp); + pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl, cast_enc, rxcp); page_info = get_rx_page_info(adapter, rxq_idx); @@ -993,7 +998,7 @@ static void skb_fill_rx_data(struct be_adapter *adapter, BUG_ON(j > MAX_SKB_FRAGS); done: - be_rx_stats_update(adapter, pktsize, num_rcvd); + be_rx_stats_update(adapter, pktsize, num_rcvd, pkt_type); } /* Process the RX completion indicated by rxcp when GRO is disabled */ @@ -1060,6 +1065,7 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter, u32 num_rcvd, pkt_size, remaining, vlanf, curr_frag_len; u16 i, rxq_idx = 0, vid, j; u8 vtm; + u8 pkt_type; num_rcvd = AMAP_GET_BITS(struct amap_eth_rx_compl, numfrags, rxcp); /* Is it a flush compl that has no data */ @@ -1070,6 +1076,7 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter, vlanf = AMAP_GET_BITS(struct amap_eth_rx_compl, vtp, rxcp); rxq_idx = AMAP_GET_BITS(struct amap_eth_rx_compl, fragndx, rxcp); vtm = AMAP_GET_BITS(struct amap_eth_rx_compl, vtm, rxcp); + pkt_type = AMAP_GET_BITS(struct amap_eth_rx_compl, cast_enc, rxcp); /* vlanf could be wrongly set in some cards. * ignore if vtm is not set */ @@ -1125,7 +1132,7 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter, vlan_gro_frags(&eq_obj->napi, adapter->vlan_grp, vid); } - be_rx_stats_update(adapter, pkt_size, num_rcvd); + be_rx_stats_update(adapter, pkt_size, num_rcvd, pkt_type); } static struct be_eth_rx_compl *be_rx_compl_get(struct be_adapter *adapter) @@ -1743,26 +1750,7 @@ static int be_poll_tx_mcc(struct napi_struct *napi, int budget) return 1; } -static inline bool be_detect_ue(struct be_adapter *adapter) -{ - u32 online0 = 0, online1 = 0; - - pci_read_config_dword(adapter->pdev, PCICFG_ONLINE0, &online0); - - pci_read_config_dword(adapter->pdev, PCICFG_ONLINE1, &online1); - - if (!online0 || !online1) { - adapter->ue_detected = true; - dev_err(&adapter->pdev->dev, - "UE Detected!! online0=%d online1=%d\n", - online0, online1); - return true; - } - - return false; -} - -void be_dump_ue(struct be_adapter *adapter) +void be_detect_dump_ue(struct be_adapter *adapter) { u32 ue_status_lo, ue_status_hi, ue_status_lo_mask, ue_status_hi_mask; u32 i; @@ -1779,6 +1767,11 @@ void be_dump_ue(struct be_adapter *adapter) ue_status_lo = (ue_status_lo & (~ue_status_lo_mask)); ue_status_hi = (ue_status_hi & (~ue_status_hi_mask)); + if (ue_status_lo || ue_status_hi) { + adapter->ue_detected = true; + dev_err(&adapter->pdev->dev, "UE Detected!!\n"); + } + if (ue_status_lo) { for (i = 0; ue_status_lo; ue_status_lo >>= 1, i++) { if (ue_status_lo & 1) @@ -1814,10 +1807,8 @@ static void be_worker(struct work_struct *work) adapter->rx_post_starved = false; be_post_rx_frags(adapter); } - if (!adapter->ue_detected) { - if (be_detect_ue(adapter)) - be_dump_ue(adapter); - } + if (!adapter->ue_detected) + be_detect_dump_ue(adapter); schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); } diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 822f586..0ddf4c6 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2466,6 +2466,9 @@ int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct pac if (!(dev->flags & IFF_MASTER)) goto out; + if (!pskb_may_pull(skb, sizeof(struct lacpdu))) + goto out; + read_lock(&bond->lock); slave = bond_get_slave_by_dev((struct bonding *)netdev_priv(dev), orig_dev); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c746b33..26bb118 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -362,6 +362,9 @@ static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct goto out; } + if (!pskb_may_pull(skb, arp_hdr_len(bond_dev))) + goto out; + if (skb->len < sizeof(struct arp_pkt)) { pr_debug("Packet is too small to be an ARP\n"); goto out; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 2cc4cfc..3b16f62 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2797,9 +2797,15 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * so it can wait */ bond_for_each_slave(bond, slave, i) { + unsigned long trans_start = dev_trans_start(slave->dev); + if (slave->link != BOND_LINK_UP) { - if (time_before_eq(jiffies, dev_trans_start(slave->dev) + delta_in_ticks) && - time_before_eq(jiffies, slave->dev->last_rx + delta_in_ticks)) { + if (time_in_range(jiffies, + trans_start - delta_in_ticks, + trans_start + delta_in_ticks) && + time_in_range(jiffies, + slave->dev->last_rx - delta_in_ticks, + slave->dev->last_rx + delta_in_ticks)) { slave->link = BOND_LINK_UP; slave->state = BOND_STATE_ACTIVE; @@ -2827,8 +2833,12 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * when the source ip is 0, so don't take the link down * if we don't know our ip yet */ - if (time_after_eq(jiffies, dev_trans_start(slave->dev) + 2*delta_in_ticks) || - (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) { + if (!time_in_range(jiffies, + trans_start - delta_in_ticks, + trans_start + 2 * delta_in_ticks) || + !time_in_range(jiffies, + slave->dev->last_rx - delta_in_ticks, + slave->dev->last_rx + 2 * delta_in_ticks)) { slave->link = BOND_LINK_DOWN; slave->state = BOND_STATE_BACKUP; @@ -2883,13 +2893,16 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) { struct slave *slave; int i, commit = 0; + unsigned long trans_start; bond_for_each_slave(bond, slave, i) { slave->new_link = BOND_LINK_NOCHANGE; if (slave->link != BOND_LINK_UP) { - if (time_before_eq(jiffies, slave_last_rx(bond, slave) + - delta_in_ticks)) { + if (time_in_range(jiffies, + slave_last_rx(bond, slave) - delta_in_ticks, + slave_last_rx(bond, slave) + delta_in_ticks)) { + slave->new_link = BOND_LINK_UP; commit++; } @@ -2902,8 +2915,9 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) * active. This avoids bouncing, as the last receive * times need a full ARP monitor cycle to be updated. */ - if (!time_after_eq(jiffies, slave->jiffies + - 2 * delta_in_ticks)) + if (time_in_range(jiffies, + slave->jiffies - delta_in_ticks, + slave->jiffies + 2 * delta_in_ticks)) continue; /* @@ -2921,8 +2935,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) */ if (slave->state == BOND_STATE_BACKUP && !bond->current_arp_slave && - time_after(jiffies, slave_last_rx(bond, slave) + - 3 * delta_in_ticks)) { + !time_in_range(jiffies, + slave_last_rx(bond, slave) - delta_in_ticks, + slave_last_rx(bond, slave) + 3 * delta_in_ticks)) { + slave->new_link = BOND_LINK_DOWN; commit++; } @@ -2933,11 +2949,15 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) * - (more than 2*delta since receive AND * the bond has an IP address) */ + trans_start = dev_trans_start(slave->dev); if ((slave->state == BOND_STATE_ACTIVE) && - (time_after_eq(jiffies, dev_trans_start(slave->dev) + - 2 * delta_in_ticks) || - (time_after_eq(jiffies, slave_last_rx(bond, slave) - + 2 * delta_in_ticks)))) { + (!time_in_range(jiffies, + trans_start - delta_in_ticks, + trans_start + 2 * delta_in_ticks) || + !time_in_range(jiffies, + slave_last_rx(bond, slave) - delta_in_ticks, + slave_last_rx(bond, slave) + 2 * delta_in_ticks))) { + slave->new_link = BOND_LINK_DOWN; commit++; } @@ -2956,6 +2976,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) { struct slave *slave; int i; + unsigned long trans_start; bond_for_each_slave(bond, slave, i) { switch (slave->new_link) { @@ -2963,10 +2984,11 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) continue; case BOND_LINK_UP: + trans_start = dev_trans_start(slave->dev); if ((!bond->curr_active_slave && - time_before_eq(jiffies, - dev_trans_start(slave->dev) + - delta_in_ticks)) || + time_in_range(jiffies, + trans_start - delta_in_ticks, + trans_start + delta_in_ticks)) || bond->curr_active_slave != slave) { slave->link = BOND_LINK_UP; bond->current_arp_slave = NULL; diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index ad19585..f208712 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -2296,6 +2296,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) case CHELSIO_GET_QSET_NUM:{ struct ch_reg edata; + memset(&edata, 0, sizeof(struct ch_reg)); + edata.cmd = CHELSIO_GET_QSET_NUM; edata.val = pi->nqsets; if (copy_to_user(useraddr, &edata, sizeof(edata))) diff --git a/drivers/net/eql.c b/drivers/net/eql.c index dda2c79..0cb1cf9 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -555,6 +555,8 @@ static int eql_g_master_cfg(struct net_device *dev, master_config_t __user *mcp) equalizer_t *eql; master_config_t mc; + memset(&mc, 0, sizeof(master_config_t)); + if (eql_is_master(dev)) { eql = netdev_priv(dev); mc.max_slaves = eql->max_slaves; diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c index b4fb07a..51919fc 100644 --- a/drivers/net/ks8851.c +++ b/drivers/net/ks8851.c @@ -503,30 +503,33 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA | RXQCR_ADRFE); - if (rxlen > 0) { - skb = netdev_alloc_skb(ks->netdev, rxlen + 2 + 8); - if (!skb) { - /* todo - dump frame and move on */ - } + if (rxlen > 4) { + unsigned int rxalign; + + rxlen -= 4; + rxalign = ALIGN(rxlen, 4); + skb = netdev_alloc_skb_ip_align(ks->netdev, rxalign); + if (skb) { - /* two bytes to ensure ip is aligned, and four bytes - * for the status header and 4 bytes of garbage */ - skb_reserve(skb, 2 + 4 + 4); + /* 4 bytes of status header + 4 bytes of + * garbage: we put them before ethernet + * header, so that they are copied, + * but ignored. + */ - rxpkt = skb_put(skb, rxlen - 4) - 8; + rxpkt = skb_put(skb, rxlen) - 8; - /* align the packet length to 4 bytes, and add 4 bytes - * as we're getting the rx status header as well */ - ks8851_rdfifo(ks, rxpkt, ALIGN(rxlen, 4) + 8); + ks8851_rdfifo(ks, rxpkt, rxalign + 8); - if (netif_msg_pktdata(ks)) - ks8851_dbg_dumpkkt(ks, rxpkt); + if (netif_msg_pktdata(ks)) + ks8851_dbg_dumpkkt(ks, rxpkt); - skb->protocol = eth_type_trans(skb, ks->netdev); - netif_rx(skb); + skb->protocol = eth_type_trans(skb, ks->netdev); + netif_rx(skb); - ks->netdev->stats.rx_packets++; - ks->netdev->stats.rx_bytes += rxlen - 4; + ks->netdev->stats.rx_packets++; + ks->netdev->stats.rx_bytes += rxlen; + } } ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c index bdf2149..87f0a93 100644 --- a/drivers/net/ll_temac_main.c +++ b/drivers/net/ll_temac_main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include /* needed for sizeof(tcphdr) */ diff --git a/drivers/net/ll_temac_mdio.c b/drivers/net/ll_temac_mdio.c index 5ae28c9..8cf9d4f 100644 --- a/drivers/net/ll_temac_mdio.c +++ b/drivers/net/ll_temac_mdio.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/niu.c b/drivers/net/niu.c index bc695d5..fe6983a 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -7269,32 +7269,28 @@ static int niu_get_ethtool_tcam_all(struct niu *np, struct niu_parent *parent = np->parent; struct niu_tcam_entry *tp; int i, idx, cnt; - u16 n_entries; unsigned long flags; - + int ret = 0; /* put the tcam size here */ nfc->data = tcam_get_size(np); niu_lock_parent(np, flags); - n_entries = nfc->rule_cnt; for (cnt = 0, i = 0; i < nfc->data; i++) { idx = tcam_get_index(np, i); tp = &parent->tcam[idx]; if (!tp->valid) continue; + if (cnt == nfc->rule_cnt) { + ret = -EMSGSIZE; + break; + } rule_locs[cnt] = i; cnt++; } niu_unlock_parent(np, flags); - if (n_entries != cnt) { - /* print warning, this should not happen */ - netdev_info(np->dev, "niu%d: In %s(): n_entries[%d] != cnt[%d]!!!\n", - np->parent->index, __func__, n_entries, cnt); - } - - return 0; + return ret; } static int niu_get_nfc(struct net_device *dev, struct ethtool_rxnfc *cmd, diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index c3edfe4..f9b509a 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -508,7 +508,8 @@ static int pcnet_confcheck(struct pcmcia_device *p_dev, unsigned int vcc, void *priv_data) { - int *has_shmem = priv_data; + int *priv = priv_data; + int try = (*priv & 0x1); int i; cistpl_io_t *io = &cfg->io; @@ -525,77 +526,103 @@ static int pcnet_confcheck(struct pcmcia_device *p_dev, i = p_dev->resource[1]->end = 0; } - *has_shmem = ((cfg->mem.nwin == 1) && - (cfg->mem.win[0].len >= 0x4000)); + *priv &= ((cfg->mem.nwin == 1) && + (cfg->mem.win[0].len >= 0x4000)) ? 0x10 : ~0x10; + p_dev->resource[0]->start = io->win[i].base; p_dev->resource[0]->end = io->win[i].len; - p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK; + if (!try) + p_dev->io_lines = io->flags & CISTPL_IO_LINES_MASK; + else + p_dev->io_lines = 16; if (p_dev->resource[0]->end + p_dev->resource[1]->end >= 32) return try_io_port(p_dev); - return 0; + return -EINVAL; +} + +static hw_info_t *pcnet_try_config(struct pcmcia_device *link, + int *has_shmem, int try) +{ + struct net_device *dev = link->priv; + hw_info_t *local_hw_info; + pcnet_dev_t *info = PRIV(dev); + int priv = try; + int ret; + + ret = pcmcia_loop_config(link, pcnet_confcheck, &priv); + if (ret) { + dev_warn(&link->dev, "no useable port range found\n"); + return NULL; + } + *has_shmem = (priv & 0x10); + + if (!link->irq) + return NULL; + + if (resource_size(link->resource[1]) == 8) { + link->conf.Attributes |= CONF_ENABLE_SPKR; + link->conf.Status = CCSR_AUDIO_ENA; + } + if ((link->manf_id == MANFID_IBM) && + (link->card_id == PRODID_IBM_HOME_AND_AWAY)) + link->conf.ConfigIndex |= 0x10; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + return NULL; + + dev->irq = link->irq; + dev->base_addr = link->resource[0]->start; + + if (info->flags & HAS_MISC_REG) { + if ((if_port == 1) || (if_port == 2)) + dev->if_port = if_port; + else + dev_notice(&link->dev, "invalid if_port requested\n"); + } else + dev->if_port = 0; + + if ((link->conf.ConfigBase == 0x03c0) && + (link->manf_id == 0x149) && (link->card_id == 0xc1ab)) { + dev_info(&link->dev, + "this is an AX88190 card - use axnet_cs instead.\n"); + return NULL; + } + + local_hw_info = get_hwinfo(link); + if (!local_hw_info) + local_hw_info = get_prom(link); + if (!local_hw_info) + local_hw_info = get_dl10019(link); + if (!local_hw_info) + local_hw_info = get_ax88190(link); + if (!local_hw_info) + local_hw_info = get_hwired(link); + + return local_hw_info; } static int pcnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; pcnet_dev_t *info = PRIV(dev); - int ret, start_pg, stop_pg, cm_offset; + int start_pg, stop_pg, cm_offset; int has_shmem = 0; hw_info_t *local_hw_info; dev_dbg(&link->dev, "pcnet_config\n"); - ret = pcmcia_loop_config(link, pcnet_confcheck, &has_shmem); - if (ret) - goto failed; - - if (!link->irq) - goto failed; - - if (resource_size(link->resource[1]) == 8) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } - if ((link->manf_id == MANFID_IBM) && - (link->card_id == PRODID_IBM_HOME_AND_AWAY)) - link->conf.ConfigIndex |= 0x10; - - ret = pcmcia_request_configuration(link, &link->conf); - if (ret) - goto failed; - dev->irq = link->irq; - dev->base_addr = link->resource[0]->start; - if (info->flags & HAS_MISC_REG) { - if ((if_port == 1) || (if_port == 2)) - dev->if_port = if_port; - else - printk(KERN_NOTICE "pcnet_cs: invalid if_port requested\n"); - } else { - dev->if_port = 0; - } - - if ((link->conf.ConfigBase == 0x03c0) && - (link->manf_id == 0x149) && (link->card_id == 0xc1ab)) { - printk(KERN_INFO "pcnet_cs: this is an AX88190 card!\n"); - printk(KERN_INFO "pcnet_cs: use axnet_cs instead.\n"); - goto failed; - } - - local_hw_info = get_hwinfo(link); - if (local_hw_info == NULL) - local_hw_info = get_prom(link); - if (local_hw_info == NULL) - local_hw_info = get_dl10019(link); - if (local_hw_info == NULL) - local_hw_info = get_ax88190(link); - if (local_hw_info == NULL) - local_hw_info = get_hwired(link); - - if (local_hw_info == NULL) { - printk(KERN_NOTICE "pcnet_cs: unable to read hardware net" - " address for io base %#3lx\n", dev->base_addr); - goto failed; + local_hw_info = pcnet_try_config(link, &has_shmem, 0); + if (!local_hw_info) { + /* check whether forcing io_lines to 16 helps... */ + pcmcia_disable_device(link); + local_hw_info = pcnet_try_config(link, &has_shmem, 1); + if (local_hw_info == NULL) { + dev_notice(&link->dev, "unable to read hardware net" + " address for io base %#3lx\n", dev->base_addr); + goto failed; + } } info->flags = local_hw_info->flags; @@ -1637,6 +1664,7 @@ static struct pcmcia_device_id pcnet_ids[] = { PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCETTX", 0x547e66dc, 0x6fc5459b), PCMCIA_DEVICE_PROD_ID12("iPort", "10/100 Ethernet Card", 0x56c538d2, 0x11b0ffc0), PCMCIA_DEVICE_PROD_ID12("KANSAI ELECTRIC CO.,LTD", "KLA-PCM/T", 0xb18dc3b4, 0xcc51a956), + PCMCIA_DEVICE_PROD_ID12("KENTRONICS", "KEP-230", 0xaf8144c9, 0x868f6616), PCMCIA_DEVICE_PROD_ID12("KCI", "PE520 PCMCIA Ethernet Adapter", 0xa89b87d3, 0x1eb88e64), PCMCIA_DEVICE_PROD_ID12("KINGMAX", "EN10T2T", 0x7bcb459a, 0xa5c81fa5), PCMCIA_DEVICE_PROD_ID12("Kingston", "KNE-PC2", 0x1128e633, 0xce2a89b3), diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 6a6b819..6c58da2 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -308,7 +308,7 @@ static int mdio_bus_suspend(struct device *dev) * may call phy routines that try to grab the same lock, and that may * lead to a deadlock. */ - if (phydev->attached_dev) + if (phydev->attached_dev && phydev->adjust_link) phy_stop_machine(phydev); if (!mdio_bus_phy_may_suspend(phydev)) @@ -331,7 +331,7 @@ static int mdio_bus_resume(struct device *dev) return ret; no_resume: - if (phydev->attached_dev) + if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev, NULL); return 0; diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 6695a51..736b917 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1314,8 +1314,13 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) hdrlen = (ppp->flags & SC_MP_XSHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; i = 0; list_for_each_entry(pch, &ppp->channels, clist) { - navail += pch->avail = (pch->chan != NULL); - pch->speed = pch->chan->speed; + if (pch->chan) { + pch->avail = 1; + navail++; + pch->speed = pch->chan->speed; + } else { + pch->avail = 0; + } if (pch->avail) { if (skb_queue_empty(&pch->file.xq) || !pch->had_frag) { diff --git a/drivers/net/pxa168_eth.c b/drivers/net/pxa168_eth.c index 410ea0a..85eddda 100644 --- a/drivers/net/pxa168_eth.c +++ b/drivers/net/pxa168_eth.c @@ -1606,6 +1606,8 @@ static int pxa168_eth_remove(struct platform_device *pdev) iounmap(pep->base); pep->base = NULL; + mdiobus_unregister(pep->smi_bus); + mdiobus_free(pep->smi_bus); unregister_netdev(dev); flush_scheduled_work(); free_netdev(dev); diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 078bbf4..a0da4a1 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2934,7 +2934,7 @@ static const struct rtl_cfg_info { .hw_start = rtl_hw_start_8168, .region = 2, .align = 8, - .intr_event = SYSErr | LinkChg | RxOverflow | + .intr_event = SYSErr | RxFIFOOver | LinkChg | RxOverflow | TxErr | TxOK | RxOK | RxErr, .napi_event = TxErr | TxOK | RxOK | RxOverflow, .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI, @@ -4625,8 +4625,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) } /* Work around for rx fifo overflow */ - if (unlikely(status & RxFIFOOver) && - (tp->mac_version == RTL_GIGA_MAC_VER_11)) { + if (unlikely(status & RxFIFOOver)) { netif_stop_queue(dev); rtl8169_tx_timeout(dev); break; diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c index bbb7951..ea0461e 100644 --- a/drivers/net/stmmac/stmmac_main.c +++ b/drivers/net/stmmac/stmmac_main.c @@ -1865,15 +1865,15 @@ static int stmmac_resume(struct platform_device *pdev) if (!netif_running(dev)) return 0; - spin_lock(&priv->lock); - if (priv->shutdown) { /* Re-open the interface and re-init the MAC/DMA - and the rings. */ + and the rings (i.e. on hibernation stage) */ stmmac_open(dev); - goto out_resume; + return 0; } + spin_lock(&priv->lock); + /* Power Down bit, into the PM register, is cleared * automatically as soon as a magic packet or a Wake-up frame * is received. Anyway, it's better to manually clear @@ -1901,7 +1901,6 @@ static int stmmac_resume(struct platform_device *pdev) netif_start_queue(dev); -out_resume: spin_unlock(&priv->lock); return 0; } diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 6efca66..1cd752f 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1652,6 +1652,8 @@ static int hso_get_count(struct hso_serial *serial, struct uart_icount cnow; struct hso_tiocmget *tiocmget = serial->tiocmget; + memset(&icount, 0, sizeof(struct serial_icounter_struct)); + if (!tiocmget) return -ENOENT; spin_lock_irq(&serial->serial_lock); diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 8ed30fa..b2bcf99 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -429,10 +429,6 @@ static const struct net_device_ops ipheth_netdev_ops = { .ndo_get_stats = &ipheth_stats, }; -static struct device_type ipheth_type = { - .name = "wwan", -}; - static int ipheth_probe(struct usb_interface *intf, const struct usb_device_id *id) { @@ -450,7 +446,7 @@ static int ipheth_probe(struct usb_interface *intf, netdev->netdev_ops = &ipheth_netdev_ops; netdev->watchdog_timeo = IPHETH_TX_TIMEOUT; - strcpy(netdev->name, "wwan%d"); + strcpy(netdev->name, "eth%d"); dev = netdev_priv(netdev); dev->udev = udev; @@ -500,7 +496,6 @@ static int ipheth_probe(struct usb_interface *intf, SET_NETDEV_DEV(netdev, &intf->dev); SET_ETHTOOL_OPS(netdev, &ops); - SET_NETDEV_DEVTYPE(netdev, &ipheth_type); retval = register_netdev(netdev); if (retval) { diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index fd69095..f534123 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -2824,7 +2824,7 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi netif_napi_add(dev, &vptr->napi, velocity_poll, VELOCITY_NAPI_WEIGHT); dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_FILTER | - NETIF_F_HW_VLAN_RX | NETIF_F_IP_CSUM | NETIF_F_SG; + NETIF_F_HW_VLAN_RX | NETIF_F_IP_CSUM; ret = register_netdev(dev); if (ret < 0) diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 373dcfe..d77ce99 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -1327,6 +1327,10 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf, PCI_DMA_TODEVICE); rate = ieee80211_get_tx_rate(sc->hw, info); + if (!rate) { + ret = -EINVAL; + goto err_unmap; + } if (info->flags & IEEE80211_TX_CTL_NO_ACK) flags |= AR5K_TXDESC_NOACK; diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index b883b17..057fb69 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -797,7 +797,7 @@ static bool ar9300_uncompress_block(struct ath_hw *ah, length = block[it+1]; length &= 0xff; - if (length > 0 && spot >= 0 && spot+length < mdataSize) { + if (length > 0 && spot >= 0 && spot+length <= mdataSize) { ath_print(common, ATH_DBG_EEPROM, "Restore at %d: spot=%d " "offset=%d length=%d\n", diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h index 7f48df1..0b09db0 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.h +++ b/drivers/net/wireless/ath/ath9k/eeprom.h @@ -62,7 +62,7 @@ #define SD_NO_CTL 0xE0 #define NO_CTL 0xff -#define CTL_MODE_M 7 +#define CTL_MODE_M 0xf #define CTL_11A 0 #define CTL_11B 1 #define CTL_11G 2 diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h index a1c3952..345dd97 100644 --- a/drivers/net/wireless/ath/regd.h +++ b/drivers/net/wireless/ath/regd.h @@ -31,7 +31,6 @@ enum ctl_group { #define NO_CTL 0xff #define SD_NO_CTL 0xE0 #define NO_CTL 0xff -#define CTL_MODE_M 7 #define CTL_11A 0 #define CTL_11B 1 #define CTL_11G 2 diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c index ba854c7..87b6349 100644 --- a/drivers/net/wireless/libertas/if_sdio.c +++ b/drivers/net/wireless/libertas/if_sdio.c @@ -128,7 +128,7 @@ struct if_sdio_card { bool helper_allocated; bool firmware_allocated; - u8 buffer[65536]; + u8 buffer[65536] __attribute__((aligned(4))); spinlock_t lock; struct if_sdio_packet *packets; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 173aec3..0e937dc 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -446,7 +446,7 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb) } if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && - (!payload->status)) + !(payload->status & P54_TX_FAILED)) info->flags |= IEEE80211_TX_STAT_ACK; if (payload->status & P54_TX_PSM_CANCELLED) info->flags |= IEEE80211_TX_STAT_TX_FILTERED; diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index a9352b2..b7e755f 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -141,16 +141,6 @@ static struct notifier_block module_load_nb = { .notifier_call = module_load_notify, }; - -static void end_sync(void) -{ - end_cpu_work(); - /* make sure we don't leak task structs */ - process_task_mortuary(); - process_task_mortuary(); -} - - int sync_start(void) { int err; @@ -158,7 +148,7 @@ int sync_start(void) if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) return -ENOMEM; - start_cpu_work(); + mutex_lock(&buffer_mutex); err = task_handoff_register(&task_free_nb); if (err) @@ -173,7 +163,10 @@ int sync_start(void) if (err) goto out4; + start_cpu_work(); + out: + mutex_unlock(&buffer_mutex); return err; out4: profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); @@ -182,7 +175,6 @@ out3: out2: task_handoff_unregister(&task_free_nb); out1: - end_sync(); free_cpumask_var(marked_cpus); goto out; } @@ -190,11 +182,20 @@ out1: void sync_stop(void) { + /* flush buffers */ + mutex_lock(&buffer_mutex); + end_cpu_work(); unregister_module_notifier(&module_load_nb); profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); task_handoff_unregister(&task_free_nb); - end_sync(); + mutex_unlock(&buffer_mutex); + flush_scheduled_work(); + + /* make sure we don't leak task structs */ + process_task_mortuary(); + process_task_mortuary(); + free_cpumask_var(marked_cpus); } diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 219f79e..f179ac2 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -120,8 +120,6 @@ void end_cpu_work(void) cancel_delayed_work(&b->work); } - - flush_scheduled_work(); } /* diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 45fcc1e..3bc72d1 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -338,9 +338,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) acpi_handle chandle, handle; struct acpi_buffer string = { ACPI_ALLOCATE_BUFFER, NULL }; - flags &= (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | - OSC_SHPC_NATIVE_HP_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + flags &= OSC_SHPC_NATIVE_HP_CONTROL; if (!flags) { err("Invalid flags %u specified!\n", flags); return -EINVAL; @@ -360,7 +358,7 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev, u32 flags) acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); dbg("Trying to get hotplug control for %s\n", (char *)string.pointer); - status = acpi_pci_osc_control_set(handle, flags); + status = acpi_pci_osc_control_set(handle, &flags, flags); if (ACPI_SUCCESS(status)) goto got_one; if (status == AE_SUPPORT) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 4ed76b4..73d5139 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -176,19 +176,11 @@ static inline void pciehp_firmware_init(void) { pciehp_acpi_slot_detection_init(); } - -static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev) -{ - int retval; - u32 flags = (OSC_PCI_EXPRESS_NATIVE_HP_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); - retval = acpi_get_hp_hw_control_from_firmware(dev, flags); - if (retval) - return retval; - return pciehp_acpi_slot_detection_check(dev); -} #else #define pciehp_firmware_init() do {} while (0) -#define pciehp_get_hp_hw_control_from_firmware(dev) 0 +static inline int pciehp_acpi_slot_detection_check(struct pci_dev *dev) +{ + return 0; +} #endif /* CONFIG_ACPI */ #endif /* _PCIEHP_H */ diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c index 1f4000a..2574700 100644 --- a/drivers/pci/hotplug/pciehp_acpi.c +++ b/drivers/pci/hotplug/pciehp_acpi.c @@ -85,9 +85,7 @@ static int __init dummy_probe(struct pcie_device *dev) acpi_handle handle; struct dummy_slot *slot, *tmp; struct pci_dev *pdev = dev->port; - /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */ - if (pciehp_get_hp_hw_control_from_firmware(pdev)) - return -ENODEV; + pos = pci_pcie_cap(pdev); if (!pos) return -ENODEV; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 3588ea6..aa5f3ff 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -59,7 +59,7 @@ module_param(pciehp_force, bool, 0644); MODULE_PARM_DESC(pciehp_debug, "Debugging mode enabled or not"); MODULE_PARM_DESC(pciehp_poll_mode, "Using polling mechanism for hot-plug events or not"); MODULE_PARM_DESC(pciehp_poll_time, "Polling mechanism frequency, in seconds"); -MODULE_PARM_DESC(pciehp_force, "Force pciehp, even if _OSC and OSHP are missing"); +MODULE_PARM_DESC(pciehp_force, "Force pciehp, even if OSHP is missing"); #define PCIE_MODULE_NAME "pciehp" @@ -235,7 +235,7 @@ static int pciehp_probe(struct pcie_device *dev) dev_info(&dev->device, "Bypassing BIOS check for pciehp use on %s\n", pci_name(dev->port)); - else if (pciehp_get_hp_hw_control_from_firmware(dev->port)) + else if (pciehp_acpi_slot_detection_check(dev->port)) goto err_out_none; ctrl = pcie_init(dev); diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 679c39d..7754a67 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -140,8 +140,10 @@ static inline void pci_msi_init_pci_dev(struct pci_dev *dev) { } #ifdef CONFIG_PCIEAER void pci_no_aer(void); +bool pci_aer_available(void); #else static inline void pci_no_aer(void) { } +static inline bool pci_aer_available(void) { return false; } #endif static inline int pci_no_d1d2(struct pci_dev *dev) diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index ea65454..00c62df 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -6,10 +6,11 @@ obj-$(CONFIG_PCIEASPM) += aspm.o pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o +pcieportdrv-$(CONFIG_ACPI) += portdrv_acpi.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o # Build PCI Express AER if needed obj-$(CONFIG_PCIEAER) += aer/ -obj-$(CONFIG_PCIE_PME) += pme/ +obj-$(CONFIG_PCIE_PME) += pme.o diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 484cc55..f409948 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -72,6 +72,11 @@ void pci_no_aer(void) pcie_aer_disable = 1; /* has priority over 'forceload' */ } +bool pci_aer_available(void) +{ + return !pcie_aer_disable && pci_msi_enabled(); +} + static int set_device_error_reporting(struct pci_dev *dev, void *data) { bool enable = *((bool *)data); @@ -411,9 +416,7 @@ static void aer_error_resume(struct pci_dev *dev) */ static int __init aer_service_init(void) { - if (pcie_aer_disable) - return -ENXIO; - if (!pci_msi_enabled()) + if (!pci_aer_available()) return -ENXIO; return pcie_port_service_register(&aerdriver); } diff --git a/drivers/pci/pcie/aer/aerdrv_acpi.c b/drivers/pci/pcie/aer/aerdrv_acpi.c index f278d7b..2bb9b89 100644 --- a/drivers/pci/pcie/aer/aerdrv_acpi.c +++ b/drivers/pci/pcie/aer/aerdrv_acpi.c @@ -19,42 +19,6 @@ #include #include "aerdrv.h" -/** - * aer_osc_setup - run ACPI _OSC method - * @pciedev: pcie_device which AER is being enabled on - * - * @return: Zero on success. Nonzero otherwise. - * - * Invoked when PCIe bus loads AER service driver. To avoid conflict with - * BIOS AER support requires BIOS to yield AER control to OS native driver. - **/ -int aer_osc_setup(struct pcie_device *pciedev) -{ - acpi_status status = AE_NOT_FOUND; - struct pci_dev *pdev = pciedev->port; - acpi_handle handle = NULL; - - if (acpi_pci_disabled) - return -1; - - handle = acpi_find_root_bridge_handle(pdev); - if (handle) { - status = acpi_pci_osc_control_set(handle, - OSC_PCI_EXPRESS_AER_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); - } - - if (ACPI_FAILURE(status)) { - dev_printk(KERN_DEBUG, &pciedev->device, "AER service couldn't " - "init device: %s\n", - (status == AE_SUPPORT || status == AE_NOT_FOUND) ? - "no _OSC support" : "_OSC failed"); - return -1; - } - - return 0; -} - #ifdef CONFIG_ACPI_APEI static inline int hest_match_pci(struct acpi_hest_aer_common *p, struct pci_dev *pci) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index fc0b5a9..29e268f 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -772,22 +772,10 @@ void aer_isr(struct work_struct *work) */ int aer_init(struct pcie_device *dev) { - if (pcie_aer_get_firmware_first(dev->port)) { - dev_printk(KERN_DEBUG, &dev->device, - "PCIe errors handled by platform firmware.\n"); - goto out; - } - - if (aer_osc_setup(dev)) - goto out; - - return 0; -out: if (forceload) { dev_printk(KERN_DEBUG, &dev->device, "aerdrv forceload requested.\n"); pcie_aer_force_firmware_first(dev->port, 0); - return 0; } - return -ENXIO; + return 0; } diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c new file mode 100644 index 0000000..2f3c904 --- /dev/null +++ b/drivers/pci/pcie/pme.c @@ -0,0 +1,462 @@ +/* + * PCIe Native PME support + * + * Copyright (C) 2007 - 2009 Intel Corp + * Copyright (C) 2007 - 2009 Shaohua Li + * Copyright (C) 2009 Rafael J. Wysocki , Novell Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License V2. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pci.h" +#include "portdrv.h" + +#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */ +#define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ + +/* + * If this switch is set, MSI will not be used for PCIe PME signaling. This + * causes the PCIe port driver to use INTx interrupts only, but it turns out + * that using MSI for PCIe PME signaling doesn't play well with PCIe PME-based + * wake-up from system sleep states. + */ +bool pcie_pme_msi_disabled; + +static int __init pcie_pme_setup(char *str) +{ + if (!strncmp(str, "nomsi", 5)) + pcie_pme_msi_disabled = true; + + return 1; +} +__setup("pcie_pme=", pcie_pme_setup); + +struct pcie_pme_service_data { + spinlock_t lock; + struct pcie_device *srv; + struct work_struct work; + bool noirq; /* Don't enable the PME interrupt used by this service. */ +}; + +/** + * pcie_pme_interrupt_enable - Enable/disable PCIe PME interrupt generation. + * @dev: PCIe root port or event collector. + * @enable: Enable or disable the interrupt. + */ +void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable) +{ + int rtctl_pos; + u16 rtctl; + + rtctl_pos = pci_pcie_cap(dev) + PCI_EXP_RTCTL; + + pci_read_config_word(dev, rtctl_pos, &rtctl); + if (enable) + rtctl |= PCI_EXP_RTCTL_PMEIE; + else + rtctl &= ~PCI_EXP_RTCTL_PMEIE; + pci_write_config_word(dev, rtctl_pos, rtctl); +} + +/** + * pcie_pme_clear_status - Clear root port PME interrupt status. + * @dev: PCIe root port or event collector. + */ +static void pcie_pme_clear_status(struct pci_dev *dev) +{ + int rtsta_pos; + u32 rtsta; + + rtsta_pos = pci_pcie_cap(dev) + PCI_EXP_RTSTA; + + pci_read_config_dword(dev, rtsta_pos, &rtsta); + rtsta |= PCI_EXP_RTSTA_PME; + pci_write_config_dword(dev, rtsta_pos, rtsta); +} + +/** + * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#. + * @bus: PCI bus to scan. + * + * Scan given PCI bus and all buses under it for devices asserting PME#. + */ +static bool pcie_pme_walk_bus(struct pci_bus *bus) +{ + struct pci_dev *dev; + bool ret = false; + + list_for_each_entry(dev, &bus->devices, bus_list) { + /* Skip PCIe devices in case we started from a root port. */ + if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { + pm_request_resume(&dev->dev); + pci_wakeup_event(dev); + ret = true; + } + + if (dev->subordinate && pcie_pme_walk_bus(dev->subordinate)) + ret = true; + } + + return ret; +} + +/** + * pcie_pme_from_pci_bridge - Check if PCIe-PCI bridge generated a PME. + * @bus: Secondary bus of the bridge. + * @devfn: Device/function number to check. + * + * PME from PCI devices under a PCIe-PCI bridge may be converted to an in-band + * PCIe PME message. In such that case the bridge should use the Requester ID + * of device/function number 0 on its secondary bus. + */ +static bool pcie_pme_from_pci_bridge(struct pci_bus *bus, u8 devfn) +{ + struct pci_dev *dev; + bool found = false; + + if (devfn) + return false; + + dev = pci_dev_get(bus->self); + if (!dev) + return false; + + if (pci_is_pcie(dev) && dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) { + down_read(&pci_bus_sem); + if (pcie_pme_walk_bus(bus)) + found = true; + up_read(&pci_bus_sem); + } + + pci_dev_put(dev); + return found; +} + +/** + * pcie_pme_handle_request - Find device that generated PME and handle it. + * @port: Root port or event collector that generated the PME interrupt. + * @req_id: PCIe Requester ID of the device that generated the PME. + */ +static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) +{ + u8 busnr = req_id >> 8, devfn = req_id & 0xff; + struct pci_bus *bus; + struct pci_dev *dev; + bool found = false; + + /* First, check if the PME is from the root port itself. */ + if (port->devfn == devfn && port->bus->number == busnr) { + if (pci_check_pme_status(port)) { + pm_request_resume(&port->dev); + found = true; + } else { + /* + * Apparently, the root port generated the PME on behalf + * of a non-PCIe device downstream. If this is done by + * a root port, the Requester ID field in its status + * register may contain either the root port's, or the + * source device's information (PCI Express Base + * Specification, Rev. 2.0, Section 6.1.9). + */ + down_read(&pci_bus_sem); + found = pcie_pme_walk_bus(port->subordinate); + up_read(&pci_bus_sem); + } + goto out; + } + + /* Second, find the bus the source device is on. */ + bus = pci_find_bus(pci_domain_nr(port->bus), busnr); + if (!bus) + goto out; + + /* Next, check if the PME is from a PCIe-PCI bridge. */ + found = pcie_pme_from_pci_bridge(bus, devfn); + if (found) + goto out; + + /* Finally, try to find the PME source on the bus. */ + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_dev_get(dev); + if (dev->devfn == devfn) { + found = true; + break; + } + pci_dev_put(dev); + } + up_read(&pci_bus_sem); + + if (found) { + /* The device is there, but we have to check its PME status. */ + found = pci_check_pme_status(dev); + if (found) { + pm_request_resume(&dev->dev); + pci_wakeup_event(dev); + } + pci_dev_put(dev); + } else if (devfn) { + /* + * The device is not there, but we can still try to recover by + * assuming that the PME was reported by a PCIe-PCI bridge that + * used devfn different from zero. + */ + dev_dbg(&port->dev, "PME interrupt generated for " + "non-existent device %02x:%02x.%d\n", + busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); + found = pcie_pme_from_pci_bridge(bus, 0); + } + + out: + if (!found) + dev_dbg(&port->dev, "Spurious native PME interrupt!\n"); +} + +/** + * pcie_pme_work_fn - Work handler for PCIe PME interrupt. + * @work: Work structure giving access to service data. + */ +static void pcie_pme_work_fn(struct work_struct *work) +{ + struct pcie_pme_service_data *data = + container_of(work, struct pcie_pme_service_data, work); + struct pci_dev *port = data->srv->port; + int rtsta_pos; + u32 rtsta; + + rtsta_pos = pci_pcie_cap(port) + PCI_EXP_RTSTA; + + spin_lock_irq(&data->lock); + + for (;;) { + if (data->noirq) + break; + + pci_read_config_dword(port, rtsta_pos, &rtsta); + if (rtsta & PCI_EXP_RTSTA_PME) { + /* + * Clear PME status of the port. If there are other + * pending PMEs, the status will be set again. + */ + pcie_pme_clear_status(port); + + spin_unlock_irq(&data->lock); + pcie_pme_handle_request(port, rtsta & 0xffff); + spin_lock_irq(&data->lock); + + continue; + } + + /* No need to loop if there are no more PMEs pending. */ + if (!(rtsta & PCI_EXP_RTSTA_PENDING)) + break; + + spin_unlock_irq(&data->lock); + cpu_relax(); + spin_lock_irq(&data->lock); + } + + if (!data->noirq) + pcie_pme_interrupt_enable(port, true); + + spin_unlock_irq(&data->lock); +} + +/** + * pcie_pme_irq - Interrupt handler for PCIe root port PME interrupt. + * @irq: Interrupt vector. + * @context: Interrupt context pointer. + */ +static irqreturn_t pcie_pme_irq(int irq, void *context) +{ + struct pci_dev *port; + struct pcie_pme_service_data *data; + int rtsta_pos; + u32 rtsta; + unsigned long flags; + + port = ((struct pcie_device *)context)->port; + data = get_service_data((struct pcie_device *)context); + + rtsta_pos = pci_pcie_cap(port) + PCI_EXP_RTSTA; + + spin_lock_irqsave(&data->lock, flags); + pci_read_config_dword(port, rtsta_pos, &rtsta); + + if (!(rtsta & PCI_EXP_RTSTA_PME)) { + spin_unlock_irqrestore(&data->lock, flags); + return IRQ_NONE; + } + + pcie_pme_interrupt_enable(port, false); + spin_unlock_irqrestore(&data->lock, flags); + + /* We don't use pm_wq, because it's freezable. */ + schedule_work(&data->work); + + return IRQ_HANDLED; +} + +/** + * pcie_pme_set_native - Set the PME interrupt flag for given device. + * @dev: PCI device to handle. + * @ign: Ignored. + */ +static int pcie_pme_set_native(struct pci_dev *dev, void *ign) +{ + dev_info(&dev->dev, "Signaling PME through PCIe PME interrupt\n"); + + device_set_run_wake(&dev->dev, true); + dev->pme_interrupt = true; + return 0; +} + +/** + * pcie_pme_mark_devices - Set the PME interrupt flag for devices below a port. + * @port: PCIe root port or event collector to handle. + * + * For each device below given root port, including the port itself (or for each + * root complex integrated endpoint if @port is a root complex event collector) + * set the flag indicating that it can signal run-time wake-up events via PCIe + * PME interrupts. + */ +static void pcie_pme_mark_devices(struct pci_dev *port) +{ + pcie_pme_set_native(port, NULL); + if (port->subordinate) { + pci_walk_bus(port->subordinate, pcie_pme_set_native, NULL); + } else { + struct pci_bus *bus = port->bus; + struct pci_dev *dev; + + /* Check if this is a root port event collector. */ + if (port->pcie_type != PCI_EXP_TYPE_RC_EC || !bus) + return; + + down_read(&pci_bus_sem); + list_for_each_entry(dev, &bus->devices, bus_list) + if (pci_is_pcie(dev) + && dev->pcie_type == PCI_EXP_TYPE_RC_END) + pcie_pme_set_native(dev, NULL); + up_read(&pci_bus_sem); + } +} + +/** + * pcie_pme_probe - Initialize PCIe PME service for given root port. + * @srv: PCIe service to initialize. + */ +static int pcie_pme_probe(struct pcie_device *srv) +{ + struct pci_dev *port; + struct pcie_pme_service_data *data; + int ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + spin_lock_init(&data->lock); + INIT_WORK(&data->work, pcie_pme_work_fn); + data->srv = srv; + set_service_data(srv, data); + + port = srv->port; + pcie_pme_interrupt_enable(port, false); + pcie_pme_clear_status(port); + + ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv); + if (ret) { + kfree(data); + } else { + pcie_pme_mark_devices(port); + pcie_pme_interrupt_enable(port, true); + } + + return ret; +} + +/** + * pcie_pme_suspend - Suspend PCIe PME service device. + * @srv: PCIe service device to suspend. + */ +static int pcie_pme_suspend(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + struct pci_dev *port = srv->port; + + spin_lock_irq(&data->lock); + pcie_pme_interrupt_enable(port, false); + pcie_pme_clear_status(port); + data->noirq = true; + spin_unlock_irq(&data->lock); + + synchronize_irq(srv->irq); + + return 0; +} + +/** + * pcie_pme_resume - Resume PCIe PME service device. + * @srv - PCIe service device to resume. + */ +static int pcie_pme_resume(struct pcie_device *srv) +{ + struct pcie_pme_service_data *data = get_service_data(srv); + struct pci_dev *port = srv->port; + + spin_lock_irq(&data->lock); + data->noirq = false; + pcie_pme_clear_status(port); + pcie_pme_interrupt_enable(port, true); + spin_unlock_irq(&data->lock); + + return 0; +} + +/** + * pcie_pme_remove - Prepare PCIe PME service device for removal. + * @srv - PCIe service device to resume. + */ +static void pcie_pme_remove(struct pcie_device *srv) +{ + pcie_pme_suspend(srv); + free_irq(srv->irq, srv); + kfree(get_service_data(srv)); +} + +static struct pcie_port_service_driver pcie_pme_driver = { + .name = "pcie_pme", + .port_type = PCI_EXP_TYPE_ROOT_PORT, + .service = PCIE_PORT_SERVICE_PME, + + .probe = pcie_pme_probe, + .suspend = pcie_pme_suspend, + .resume = pcie_pme_resume, + .remove = pcie_pme_remove, +}; + +/** + * pcie_pme_service_init - Register the PCIe PME service driver. + */ +static int __init pcie_pme_service_init(void) +{ + return pcie_port_service_register(&pcie_pme_driver); +} + +module_init(pcie_pme_service_init); diff --git a/drivers/pci/pcie/pme/Makefile b/drivers/pci/pcie/pme/Makefile deleted file mode 100644 index 8b92380..0000000 --- a/drivers/pci/pcie/pme/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for PCI-Express Root Port PME signaling driver -# - -obj-$(CONFIG_PCIE_PME) += pmedriver.o - -pmedriver-objs := pcie_pme.o -pmedriver-$(CONFIG_ACPI) += pcie_pme_acpi.o diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c deleted file mode 100644 index bbdea18..0000000 --- a/drivers/pci/pcie/pme/pcie_pme.c +++ /dev/null @@ -1,516 +0,0 @@ -/* - * PCIe Native PME support - * - * Copyright (C) 2007 - 2009 Intel Corp - * Copyright (C) 2007 - 2009 Shaohua Li - * Copyright (C) 2009 Rafael J. Wysocki , Novell Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License V2. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../../pci.h" -#include "pcie_pme.h" - -#define PCI_EXP_RTSTA_PME 0x10000 /* PME status */ -#define PCI_EXP_RTSTA_PENDING 0x20000 /* PME pending */ - -/* - * If set, this switch will prevent the PCIe root port PME service driver from - * being registered. Consequently, the interrupt-based PCIe PME signaling will - * not be used by any PCIe root ports in that case. - */ -static bool pcie_pme_disabled = true; - -/* - * The PCI Express Base Specification 2.0, Section 6.1.8, states the following: - * "In order to maintain compatibility with non-PCI Express-aware system - * software, system power management logic must be configured by firmware to use - * the legacy mechanism of signaling PME by default. PCI Express-aware system - * software must notify the firmware prior to enabling native, interrupt-based - * PME signaling." However, if the platform doesn't provide us with a suitable - * notification mechanism or the notification fails, it is not clear whether or - * not we are supposed to use the interrupt-based PCIe PME signaling. The - * switch below can be used to indicate the desired behaviour. When set, it - * will make the kernel use the interrupt-based PCIe PME signaling regardless of - * the platform notification status, although the kernel will attempt to notify - * the platform anyway. When unset, it will prevent the kernel from using the - * the interrupt-based PCIe PME signaling if the platform notification fails, - * which is the default. - */ -static bool pcie_pme_force_enable; - -/* - * If this switch is set, MSI will not be used for PCIe PME signaling. This - * causes the PCIe port driver to use INTx interrupts only, but it turns out - * that using MSI for PCIe PME signaling doesn't play well with PCIe PME-based - * wake-up from system sleep states. - */ -bool pcie_pme_msi_disabled; - -static int __init pcie_pme_setup(char *str) -{ - if (!strncmp(str, "auto", 4)) - pcie_pme_disabled = false; - else if (!strncmp(str, "force", 5)) - pcie_pme_force_enable = true; - - str = strchr(str, ','); - if (str) { - str++; - str += strspn(str, " \t"); - if (*str && !strcmp(str, "nomsi")) - pcie_pme_msi_disabled = true; - } - - return 1; -} -__setup("pcie_pme=", pcie_pme_setup); - -/** - * pcie_pme_platform_setup - Ensure that the kernel controls the PCIe PME. - * @srv: PCIe PME root port service to use for carrying out the check. - * - * Notify the platform that the native PCIe PME is going to be used and return - * 'true' if the control of the PCIe PME registers has been acquired from the - * platform. - */ -static bool pcie_pme_platform_setup(struct pcie_device *srv) -{ - if (!pcie_pme_platform_notify(srv)) - return true; - return pcie_pme_force_enable; -} - -struct pcie_pme_service_data { - spinlock_t lock; - struct pcie_device *srv; - struct work_struct work; - bool noirq; /* Don't enable the PME interrupt used by this service. */ -}; - -/** - * pcie_pme_interrupt_enable - Enable/disable PCIe PME interrupt generation. - * @dev: PCIe root port or event collector. - * @enable: Enable or disable the interrupt. - */ -static void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable) -{ - int rtctl_pos; - u16 rtctl; - - rtctl_pos = pci_pcie_cap(dev) + PCI_EXP_RTCTL; - - pci_read_config_word(dev, rtctl_pos, &rtctl); - if (enable) - rtctl |= PCI_EXP_RTCTL_PMEIE; - else - rtctl &= ~PCI_EXP_RTCTL_PMEIE; - pci_write_config_word(dev, rtctl_pos, rtctl); -} - -/** - * pcie_pme_clear_status - Clear root port PME interrupt status. - * @dev: PCIe root port or event collector. - */ -static void pcie_pme_clear_status(struct pci_dev *dev) -{ - int rtsta_pos; - u32 rtsta; - - rtsta_pos = pci_pcie_cap(dev) + PCI_EXP_RTSTA; - - pci_read_config_dword(dev, rtsta_pos, &rtsta); - rtsta |= PCI_EXP_RTSTA_PME; - pci_write_config_dword(dev, rtsta_pos, rtsta); -} - -/** - * pcie_pme_walk_bus - Scan a PCI bus for devices asserting PME#. - * @bus: PCI bus to scan. - * - * Scan given PCI bus and all buses under it for devices asserting PME#. - */ -static bool pcie_pme_walk_bus(struct pci_bus *bus) -{ - struct pci_dev *dev; - bool ret = false; - - list_for_each_entry(dev, &bus->devices, bus_list) { - /* Skip PCIe devices in case we started from a root port. */ - if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { - pm_request_resume(&dev->dev); - pci_wakeup_event(dev); - ret = true; - } - - if (dev->subordinate && pcie_pme_walk_bus(dev->subordinate)) - ret = true; - } - - return ret; -} - -/** - * pcie_pme_from_pci_bridge - Check if PCIe-PCI bridge generated a PME. - * @bus: Secondary bus of the bridge. - * @devfn: Device/function number to check. - * - * PME from PCI devices under a PCIe-PCI bridge may be converted to an in-band - * PCIe PME message. In such that case the bridge should use the Requester ID - * of device/function number 0 on its secondary bus. - */ -static bool pcie_pme_from_pci_bridge(struct pci_bus *bus, u8 devfn) -{ - struct pci_dev *dev; - bool found = false; - - if (devfn) - return false; - - dev = pci_dev_get(bus->self); - if (!dev) - return false; - - if (pci_is_pcie(dev) && dev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE) { - down_read(&pci_bus_sem); - if (pcie_pme_walk_bus(bus)) - found = true; - up_read(&pci_bus_sem); - } - - pci_dev_put(dev); - return found; -} - -/** - * pcie_pme_handle_request - Find device that generated PME and handle it. - * @port: Root port or event collector that generated the PME interrupt. - * @req_id: PCIe Requester ID of the device that generated the PME. - */ -static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) -{ - u8 busnr = req_id >> 8, devfn = req_id & 0xff; - struct pci_bus *bus; - struct pci_dev *dev; - bool found = false; - - /* First, check if the PME is from the root port itself. */ - if (port->devfn == devfn && port->bus->number == busnr) { - if (pci_check_pme_status(port)) { - pm_request_resume(&port->dev); - found = true; - } else { - /* - * Apparently, the root port generated the PME on behalf - * of a non-PCIe device downstream. If this is done by - * a root port, the Requester ID field in its status - * register may contain either the root port's, or the - * source device's information (PCI Express Base - * Specification, Rev. 2.0, Section 6.1.9). - */ - down_read(&pci_bus_sem); - found = pcie_pme_walk_bus(port->subordinate); - up_read(&pci_bus_sem); - } - goto out; - } - - /* Second, find the bus the source device is on. */ - bus = pci_find_bus(pci_domain_nr(port->bus), busnr); - if (!bus) - goto out; - - /* Next, check if the PME is from a PCIe-PCI bridge. */ - found = pcie_pme_from_pci_bridge(bus, devfn); - if (found) - goto out; - - /* Finally, try to find the PME source on the bus. */ - down_read(&pci_bus_sem); - list_for_each_entry(dev, &bus->devices, bus_list) { - pci_dev_get(dev); - if (dev->devfn == devfn) { - found = true; - break; - } - pci_dev_put(dev); - } - up_read(&pci_bus_sem); - - if (found) { - /* The device is there, but we have to check its PME status. */ - found = pci_check_pme_status(dev); - if (found) { - pm_request_resume(&dev->dev); - pci_wakeup_event(dev); - } - pci_dev_put(dev); - } else if (devfn) { - /* - * The device is not there, but we can still try to recover by - * assuming that the PME was reported by a PCIe-PCI bridge that - * used devfn different from zero. - */ - dev_dbg(&port->dev, "PME interrupt generated for " - "non-existent device %02x:%02x.%d\n", - busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); - found = pcie_pme_from_pci_bridge(bus, 0); - } - - out: - if (!found) - dev_dbg(&port->dev, "Spurious native PME interrupt!\n"); -} - -/** - * pcie_pme_work_fn - Work handler for PCIe PME interrupt. - * @work: Work structure giving access to service data. - */ -static void pcie_pme_work_fn(struct work_struct *work) -{ - struct pcie_pme_service_data *data = - container_of(work, struct pcie_pme_service_data, work); - struct pci_dev *port = data->srv->port; - int rtsta_pos; - u32 rtsta; - - rtsta_pos = pci_pcie_cap(port) + PCI_EXP_RTSTA; - - spin_lock_irq(&data->lock); - - for (;;) { - if (data->noirq) - break; - - pci_read_config_dword(port, rtsta_pos, &rtsta); - if (rtsta & PCI_EXP_RTSTA_PME) { - /* - * Clear PME status of the port. If there are other - * pending PMEs, the status will be set again. - */ - pcie_pme_clear_status(port); - - spin_unlock_irq(&data->lock); - pcie_pme_handle_request(port, rtsta & 0xffff); - spin_lock_irq(&data->lock); - - continue; - } - - /* No need to loop if there are no more PMEs pending. */ - if (!(rtsta & PCI_EXP_RTSTA_PENDING)) - break; - - spin_unlock_irq(&data->lock); - cpu_relax(); - spin_lock_irq(&data->lock); - } - - if (!data->noirq) - pcie_pme_interrupt_enable(port, true); - - spin_unlock_irq(&data->lock); -} - -/** - * pcie_pme_irq - Interrupt handler for PCIe root port PME interrupt. - * @irq: Interrupt vector. - * @context: Interrupt context pointer. - */ -static irqreturn_t pcie_pme_irq(int irq, void *context) -{ - struct pci_dev *port; - struct pcie_pme_service_data *data; - int rtsta_pos; - u32 rtsta; - unsigned long flags; - - port = ((struct pcie_device *)context)->port; - data = get_service_data((struct pcie_device *)context); - - rtsta_pos = pci_pcie_cap(port) + PCI_EXP_RTSTA; - - spin_lock_irqsave(&data->lock, flags); - pci_read_config_dword(port, rtsta_pos, &rtsta); - - if (!(rtsta & PCI_EXP_RTSTA_PME)) { - spin_unlock_irqrestore(&data->lock, flags); - return IRQ_NONE; - } - - pcie_pme_interrupt_enable(port, false); - spin_unlock_irqrestore(&data->lock, flags); - - /* We don't use pm_wq, because it's freezable. */ - schedule_work(&data->work); - - return IRQ_HANDLED; -} - -/** - * pcie_pme_set_native - Set the PME interrupt flag for given device. - * @dev: PCI device to handle. - * @ign: Ignored. - */ -static int pcie_pme_set_native(struct pci_dev *dev, void *ign) -{ - dev_info(&dev->dev, "Signaling PME through PCIe PME interrupt\n"); - - device_set_run_wake(&dev->dev, true); - dev->pme_interrupt = true; - return 0; -} - -/** - * pcie_pme_mark_devices - Set the PME interrupt flag for devices below a port. - * @port: PCIe root port or event collector to handle. - * - * For each device below given root port, including the port itself (or for each - * root complex integrated endpoint if @port is a root complex event collector) - * set the flag indicating that it can signal run-time wake-up events via PCIe - * PME interrupts. - */ -static void pcie_pme_mark_devices(struct pci_dev *port) -{ - pcie_pme_set_native(port, NULL); - if (port->subordinate) { - pci_walk_bus(port->subordinate, pcie_pme_set_native, NULL); - } else { - struct pci_bus *bus = port->bus; - struct pci_dev *dev; - - /* Check if this is a root port event collector. */ - if (port->pcie_type != PCI_EXP_TYPE_RC_EC || !bus) - return; - - down_read(&pci_bus_sem); - list_for_each_entry(dev, &bus->devices, bus_list) - if (pci_is_pcie(dev) - && dev->pcie_type == PCI_EXP_TYPE_RC_END) - pcie_pme_set_native(dev, NULL); - up_read(&pci_bus_sem); - } -} - -/** - * pcie_pme_probe - Initialize PCIe PME service for given root port. - * @srv: PCIe service to initialize. - */ -static int pcie_pme_probe(struct pcie_device *srv) -{ - struct pci_dev *port; - struct pcie_pme_service_data *data; - int ret; - - if (!pcie_pme_platform_setup(srv)) - return -EACCES; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - - spin_lock_init(&data->lock); - INIT_WORK(&data->work, pcie_pme_work_fn); - data->srv = srv; - set_service_data(srv, data); - - port = srv->port; - pcie_pme_interrupt_enable(port, false); - pcie_pme_clear_status(port); - - ret = request_irq(srv->irq, pcie_pme_irq, IRQF_SHARED, "PCIe PME", srv); - if (ret) { - kfree(data); - } else { - pcie_pme_mark_devices(port); - pcie_pme_interrupt_enable(port, true); - } - - return ret; -} - -/** - * pcie_pme_suspend - Suspend PCIe PME service device. - * @srv: PCIe service device to suspend. - */ -static int pcie_pme_suspend(struct pcie_device *srv) -{ - struct pcie_pme_service_data *data = get_service_data(srv); - struct pci_dev *port = srv->port; - - spin_lock_irq(&data->lock); - pcie_pme_interrupt_enable(port, false); - pcie_pme_clear_status(port); - data->noirq = true; - spin_unlock_irq(&data->lock); - - synchronize_irq(srv->irq); - - return 0; -} - -/** - * pcie_pme_resume - Resume PCIe PME service device. - * @srv - PCIe service device to resume. - */ -static int pcie_pme_resume(struct pcie_device *srv) -{ - struct pcie_pme_service_data *data = get_service_data(srv); - struct pci_dev *port = srv->port; - - spin_lock_irq(&data->lock); - data->noirq = false; - pcie_pme_clear_status(port); - pcie_pme_interrupt_enable(port, true); - spin_unlock_irq(&data->lock); - - return 0; -} - -/** - * pcie_pme_remove - Prepare PCIe PME service device for removal. - * @srv - PCIe service device to resume. - */ -static void pcie_pme_remove(struct pcie_device *srv) -{ - pcie_pme_suspend(srv); - free_irq(srv->irq, srv); - kfree(get_service_data(srv)); -} - -static struct pcie_port_service_driver pcie_pme_driver = { - .name = "pcie_pme", - .port_type = PCI_EXP_TYPE_ROOT_PORT, - .service = PCIE_PORT_SERVICE_PME, - - .probe = pcie_pme_probe, - .suspend = pcie_pme_suspend, - .resume = pcie_pme_resume, - .remove = pcie_pme_remove, -}; - -/** - * pcie_pme_service_init - Register the PCIe PME service driver. - */ -static int __init pcie_pme_service_init(void) -{ - return pcie_pme_disabled ? - -ENODEV : pcie_port_service_register(&pcie_pme_driver); -} - -module_init(pcie_pme_service_init); diff --git a/drivers/pci/pcie/pme/pcie_pme.h b/drivers/pci/pcie/pme/pcie_pme.h deleted file mode 100644 index b30d2b7..0000000 --- a/drivers/pci/pcie/pme/pcie_pme.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * drivers/pci/pcie/pme/pcie_pme.h - * - * PCI Express Root Port PME signaling support - * - * Copyright (C) 2009 Rafael J. Wysocki , Novell Inc. - */ - -#ifndef _PCIE_PME_H_ -#define _PCIE_PME_H_ - -struct pcie_device; - -#ifdef CONFIG_ACPI -extern int pcie_pme_acpi_setup(struct pcie_device *srv); - -static inline int pcie_pme_platform_notify(struct pcie_device *srv) -{ - return pcie_pme_acpi_setup(srv); -} -#else /* !CONFIG_ACPI */ -static inline int pcie_pme_platform_notify(struct pcie_device *srv) -{ - return 0; -} -#endif /* !CONFIG_ACPI */ - -#endif diff --git a/drivers/pci/pcie/pme/pcie_pme_acpi.c b/drivers/pci/pcie/pme/pcie_pme_acpi.c deleted file mode 100644 index 83ab228..0000000 --- a/drivers/pci/pcie/pme/pcie_pme_acpi.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * PCIe Native PME support, ACPI-related part - * - * Copyright (C) 2009 Rafael J. Wysocki , Novell Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License V2. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include -#include -#include -#include - -/** - * pcie_pme_acpi_setup - Request the ACPI BIOS to release control over PCIe PME. - * @srv - PCIe PME service for a root port or event collector. - * - * Invoked when the PCIe bus type loads PCIe PME service driver. To avoid - * conflict with the BIOS PCIe support requires the BIOS to yield PCIe PME - * control to the kernel. - */ -int pcie_pme_acpi_setup(struct pcie_device *srv) -{ - acpi_status status = AE_NOT_FOUND; - struct pci_dev *port = srv->port; - acpi_handle handle; - int error = 0; - - if (acpi_pci_disabled) - return -ENOSYS; - - dev_info(&port->dev, "Requesting control of PCIe PME from ACPI BIOS\n"); - - handle = acpi_find_root_bridge_handle(port); - if (!handle) - return -EINVAL; - - status = acpi_pci_osc_control_set(handle, - OSC_PCI_EXPRESS_PME_CONTROL | - OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); - if (ACPI_FAILURE(status)) { - dev_info(&port->dev, - "Failed to receive control of PCIe PME service: %s\n", - (status == AE_SUPPORT || status == AE_NOT_FOUND) ? - "no _OSC support" : "ACPI _OSC failed"); - error = -ENODEV; - } - - return error; -} diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 813a5c3..7b5aba0 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -20,6 +20,9 @@ #define get_descriptor_id(type, service) (((type - 4) << 4) | service) +extern bool pcie_ports_disabled; +extern bool pcie_ports_auto; + extern struct bus_type pcie_port_bus_type; extern int pcie_port_device_register(struct pci_dev *dev); #ifdef CONFIG_PM @@ -30,6 +33,8 @@ extern void pcie_port_device_remove(struct pci_dev *dev); extern int __must_check pcie_port_bus_register(void); extern void pcie_port_bus_unregister(void); +struct pci_dev; + #ifdef CONFIG_PCIE_PME extern bool pcie_pme_msi_disabled; @@ -42,9 +47,26 @@ static inline bool pcie_pme_no_msi(void) { return pcie_pme_msi_disabled; } + +extern void pcie_pme_interrupt_enable(struct pci_dev *dev, bool enable); #else /* !CONFIG_PCIE_PME */ static inline void pcie_pme_disable_msi(void) {} static inline bool pcie_pme_no_msi(void) { return false; } +static inline void pcie_pme_interrupt_enable(struct pci_dev *dev, bool en) {} #endif /* !CONFIG_PCIE_PME */ +#ifdef CONFIG_ACPI +extern int pcie_port_acpi_setup(struct pci_dev *port, int *mask); + +static inline int pcie_port_platform_notify(struct pci_dev *port, int *mask) +{ + return pcie_port_acpi_setup(port, mask); +} +#else /* !CONFIG_ACPI */ +static inline int pcie_port_platform_notify(struct pci_dev *port, int *mask) +{ + return 0; +} +#endif /* !CONFIG_ACPI */ + #endif /* _PORTDRV_H_ */ diff --git a/drivers/pci/pcie/portdrv_acpi.c b/drivers/pci/pcie/portdrv_acpi.c new file mode 100644 index 0000000..b7c4cb1 --- /dev/null +++ b/drivers/pci/pcie/portdrv_acpi.c @@ -0,0 +1,77 @@ +/* + * PCIe Port Native Services Support, ACPI-Related Part + * + * Copyright (C) 2010 Rafael J. Wysocki , Novell Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License V2. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "aer/aerdrv.h" +#include "../pci.h" + +/** + * pcie_port_acpi_setup - Request the BIOS to release control of PCIe services. + * @port: PCIe Port service for a root port or event collector. + * @srv_mask: Bit mask of services that can be enabled for @port. + * + * Invoked when @port is identified as a PCIe port device. To avoid conflicts + * with the BIOS PCIe port native services support requires the BIOS to yield + * control of these services to the kernel. The mask of services that the BIOS + * allows to be enabled for @port is written to @srv_mask. + * + * NOTE: It turns out that we cannot do that for individual port services + * separately, because that would make some systems work incorrectly. + */ +int pcie_port_acpi_setup(struct pci_dev *port, int *srv_mask) +{ + acpi_status status; + acpi_handle handle; + u32 flags; + + if (acpi_pci_disabled) + return 0; + + handle = acpi_find_root_bridge_handle(port); + if (!handle) + return -EINVAL; + + flags = OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL + | OSC_PCI_EXPRESS_NATIVE_HP_CONTROL + | OSC_PCI_EXPRESS_PME_CONTROL; + + if (pci_aer_available()) { + if (pcie_aer_get_firmware_first(port)) + dev_dbg(&port->dev, "PCIe errors handled by BIOS.\n"); + else + flags |= OSC_PCI_EXPRESS_AER_CONTROL; + } + + status = acpi_pci_osc_control_set(handle, &flags, + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + if (ACPI_FAILURE(status)) { + dev_dbg(&port->dev, "ACPI _OSC request failed (code %d)\n", + status); + return -ENODEV; + } + + dev_info(&port->dev, "ACPI _OSC control granted for 0x%02x\n", flags); + + *srv_mask = PCIE_PORT_SERVICE_VC; + if (flags & OSC_PCI_EXPRESS_NATIVE_HP_CONTROL) + *srv_mask |= PCIE_PORT_SERVICE_HP; + if (flags & OSC_PCI_EXPRESS_PME_CONTROL) + *srv_mask |= PCIE_PORT_SERVICE_PME; + if (flags & OSC_PCI_EXPRESS_AER_CONTROL) + *srv_mask |= PCIE_PORT_SERVICE_AER; + + return 0; +} diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index e73effb..a9c222d 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "../pci.h" #include "portdrv.h" @@ -236,24 +238,64 @@ static int get_port_device_capability(struct pci_dev *dev) int services = 0, pos; u16 reg16; u32 reg32; + int cap_mask; + int err; + + err = pcie_port_platform_notify(dev, &cap_mask); + if (pcie_ports_auto) { + if (err) { + pcie_no_aspm(); + return 0; + } + } else { + cap_mask = PCIE_PORT_SERVICE_PME | PCIE_PORT_SERVICE_HP + | PCIE_PORT_SERVICE_VC; + if (pci_aer_available()) + cap_mask |= PCIE_PORT_SERVICE_AER; + } pos = pci_pcie_cap(dev); pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); /* Hot-Plug Capable */ - if (reg16 & PCI_EXP_FLAGS_SLOT) { + if ((cap_mask & PCIE_PORT_SERVICE_HP) && (reg16 & PCI_EXP_FLAGS_SLOT)) { pci_read_config_dword(dev, pos + PCI_EXP_SLTCAP, ®32); - if (reg32 & PCI_EXP_SLTCAP_HPC) + if (reg32 & PCI_EXP_SLTCAP_HPC) { services |= PCIE_PORT_SERVICE_HP; + /* + * Disable hot-plug interrupts in case they have been + * enabled by the BIOS and the hot-plug service driver + * is not loaded. + */ + pos += PCI_EXP_SLTCTL; + pci_read_config_word(dev, pos, ®16); + reg16 &= ~(PCI_EXP_SLTCTL_CCIE | PCI_EXP_SLTCTL_HPIE); + pci_write_config_word(dev, pos, reg16); + } } /* AER capable */ - if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) + if ((cap_mask & PCIE_PORT_SERVICE_AER) + && pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) { services |= PCIE_PORT_SERVICE_AER; + /* + * Disable AER on this port in case it's been enabled by the + * BIOS (the AER service driver will enable it when necessary). + */ + pci_disable_pcie_error_reporting(dev); + } /* VC support */ if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_VC)) services |= PCIE_PORT_SERVICE_VC; /* Root ports are capable of generating PME too */ - if (dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) + if ((cap_mask & PCIE_PORT_SERVICE_PME) + && dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) { services |= PCIE_PORT_SERVICE_PME; + /* + * Disable PME interrupt on this port in case it's been enabled + * by the BIOS (the PME service driver will enable it when + * necessary). + */ + pcie_pme_interrupt_enable(dev, false); + } return services; } @@ -494,6 +536,9 @@ static void pcie_port_shutdown_service(struct device *dev) {} */ int pcie_port_service_register(struct pcie_port_service_driver *new) { + if (pcie_ports_disabled) + return -ENODEV; + new->driver.name = (char *)new->name; new->driver.bus = &pcie_port_bus_type; new->driver.probe = pcie_port_probe_service; diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 3debed2..f9033e1 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "portdrv.h" #include "aer/aerdrv.h" @@ -29,6 +30,31 @@ MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); +/* If this switch is set, PCIe port native services should not be enabled. */ +bool pcie_ports_disabled; + +/* + * If this switch is set, ACPI _OSC will be used to determine whether or not to + * enable PCIe port native services. + */ +bool pcie_ports_auto = true; + +static int __init pcie_port_setup(char *str) +{ + if (!strncmp(str, "compat", 6)) { + pcie_ports_disabled = true; + } else if (!strncmp(str, "native", 6)) { + pcie_ports_disabled = false; + pcie_ports_auto = false; + } else if (!strncmp(str, "auto", 4)) { + pcie_ports_disabled = false; + pcie_ports_auto = true; + } + + return 1; +} +__setup("pcie_ports=", pcie_port_setup); + /* global data */ static int pcie_portdrv_restore_config(struct pci_dev *dev) @@ -301,6 +327,11 @@ static int __init pcie_portdrv_init(void) { int retval; + if (pcie_ports_disabled) { + pcie_no_aspm(); + return -EACCES; + } + dmi_check_system(pcie_portdrv_dmi_table); retval = pcie_port_bus_register(); @@ -315,11 +346,4 @@ static int __init pcie_portdrv_init(void) return retval; } -static void __exit pcie_portdrv_exit(void) -{ - pci_unregister_driver(&pcie_portdriver); - pcie_port_bus_unregister(); -} - module_init(pcie_portdrv_init); -module_exit(pcie_portdrv_exit); diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 659eaa0..968cfea 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -49,7 +49,7 @@ static ssize_t address_read_file(struct pci_slot *slot, char *buf) } /* these strings match up with the values in pci_bus_speed */ -static char *pci_bus_speed_strings[] = { +static const char *pci_bus_speed_strings[] = { "33 MHz PCI", /* 0x00 */ "66 MHz PCI", /* 0x01 */ "66 MHz PCI-X", /* 0x02 */ diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 54aa1c2..a5c1765 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -163,7 +163,7 @@ static int pcmcia_access_config(struct pcmcia_device *p_dev, c = p_dev->function_config; if (!(c->state & CONFIG_LOCKED)) { - dev_dbg(&s->dev, "Configuration isnt't locked\n"); + dev_dbg(&p_dev->dev, "Configuration isnt't locked\n"); mutex_unlock(&s->ops_mutex); return -EACCES; } @@ -220,7 +220,7 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, s->win[w].card_start = offset; ret = s->ops->set_mem_map(s, &s->win[w]); if (ret) - dev_warn(&s->dev, "failed to set_mem_map\n"); + dev_warn(&p_dev->dev, "failed to set_mem_map\n"); mutex_unlock(&s->ops_mutex); return ret; } /* pcmcia_map_mem_page */ @@ -244,18 +244,18 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, c = p_dev->function_config; if (!(s->state & SOCKET_PRESENT)) { - dev_dbg(&s->dev, "No card present\n"); + dev_dbg(&p_dev->dev, "No card present\n"); ret = -ENODEV; goto unlock; } if (!(c->state & CONFIG_LOCKED)) { - dev_dbg(&s->dev, "Configuration isnt't locked\n"); + dev_dbg(&p_dev->dev, "Configuration isnt't locked\n"); ret = -EACCES; goto unlock; } if (mod->Attributes & (CONF_IRQ_CHANGE_VALID | CONF_VCC_CHANGE_VALID)) { - dev_dbg(&s->dev, + dev_dbg(&p_dev->dev, "changing Vcc or IRQ is not allowed at this time\n"); ret = -EINVAL; goto unlock; @@ -265,20 +265,22 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) && (mod->Attributes & CONF_VPP2_CHANGE_VALID)) { if (mod->Vpp1 != mod->Vpp2) { - dev_dbg(&s->dev, "Vpp1 and Vpp2 must be the same\n"); + dev_dbg(&p_dev->dev, + "Vpp1 and Vpp2 must be the same\n"); ret = -EINVAL; goto unlock; } s->socket.Vpp = mod->Vpp1; if (s->ops->set_socket(s, &s->socket)) { - dev_printk(KERN_WARNING, &s->dev, + dev_printk(KERN_WARNING, &p_dev->dev, "Unable to set VPP\n"); ret = -EIO; goto unlock; } } else if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) || (mod->Attributes & CONF_VPP2_CHANGE_VALID)) { - dev_dbg(&s->dev, "changing Vcc is not allowed at this time\n"); + dev_dbg(&p_dev->dev, + "changing Vcc is not allowed at this time\n"); ret = -EINVAL; goto unlock; } @@ -401,7 +403,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, struct resource *res) win = &s->win[w]; if (!(p_dev->_win & CLIENT_WIN_REQ(w))) { - dev_dbg(&s->dev, "not releasing unknown window\n"); + dev_dbg(&p_dev->dev, "not releasing unknown window\n"); mutex_unlock(&s->ops_mutex); return -EINVAL; } @@ -439,7 +441,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, return -ENODEV; if (req->IntType & INT_CARDBUS) { - dev_dbg(&s->dev, "IntType may not be INT_CARDBUS\n"); + dev_dbg(&p_dev->dev, "IntType may not be INT_CARDBUS\n"); return -EINVAL; } @@ -447,7 +449,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, c = p_dev->function_config; if (c->state & CONFIG_LOCKED) { mutex_unlock(&s->ops_mutex); - dev_dbg(&s->dev, "Configuration is locked\n"); + dev_dbg(&p_dev->dev, "Configuration is locked\n"); return -EACCES; } @@ -455,7 +457,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, s->socket.Vpp = req->Vpp; if (s->ops->set_socket(s, &s->socket)) { mutex_unlock(&s->ops_mutex); - dev_printk(KERN_WARNING, &s->dev, + dev_printk(KERN_WARNING, &p_dev->dev, "Unable to set socket state\n"); return -EINVAL; } @@ -569,19 +571,20 @@ int pcmcia_request_io(struct pcmcia_device *p_dev) int ret = -EINVAL; mutex_lock(&s->ops_mutex); - dev_dbg(&s->dev, "pcmcia_request_io: %pR , %pR", &c->io[0], &c->io[1]); + dev_dbg(&p_dev->dev, "pcmcia_request_io: %pR , %pR", + &c->io[0], &c->io[1]); if (!(s->state & SOCKET_PRESENT)) { - dev_dbg(&s->dev, "pcmcia_request_io: No card present\n"); + dev_dbg(&p_dev->dev, "pcmcia_request_io: No card present\n"); goto out; } if (c->state & CONFIG_LOCKED) { - dev_dbg(&s->dev, "Configuration is locked\n"); + dev_dbg(&p_dev->dev, "Configuration is locked\n"); goto out; } if (c->state & CONFIG_IO_REQ) { - dev_dbg(&s->dev, "IO already configured\n"); + dev_dbg(&p_dev->dev, "IO already configured\n"); goto out; } @@ -601,7 +604,7 @@ int pcmcia_request_io(struct pcmcia_device *p_dev) c->state |= CONFIG_IO_REQ; p_dev->_io = 1; - dev_dbg(&s->dev, "pcmcia_request_io succeeded: %pR , %pR", + dev_dbg(&p_dev->dev, "pcmcia_request_io succeeded: %pR , %pR", &c->io[0], &c->io[1]); out: mutex_unlock(&s->ops_mutex); @@ -800,7 +803,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha int w; if (!(s->state & SOCKET_PRESENT)) { - dev_dbg(&s->dev, "No card present\n"); + dev_dbg(&p_dev->dev, "No card present\n"); return -ENODEV; } @@ -809,12 +812,12 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha req->Size = s->map_size; align = (s->features & SS_CAP_MEM_ALIGN) ? req->Size : s->map_size; if (req->Size & (s->map_size-1)) { - dev_dbg(&s->dev, "invalid map size\n"); + dev_dbg(&p_dev->dev, "invalid map size\n"); return -EINVAL; } if ((req->Base && (s->features & SS_CAP_STATIC_MAP)) || (req->Base & (align-1))) { - dev_dbg(&s->dev, "invalid base address\n"); + dev_dbg(&p_dev->dev, "invalid base address\n"); return -EINVAL; } if (req->Base) @@ -826,7 +829,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha if (!(s->state & SOCKET_WIN_REQ(w))) break; if (w == MAX_WIN) { - dev_dbg(&s->dev, "all windows are used already\n"); + dev_dbg(&p_dev->dev, "all windows are used already\n"); mutex_unlock(&s->ops_mutex); return -EINVAL; } @@ -837,7 +840,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha win->res = pcmcia_find_mem_region(req->Base, req->Size, align, 0, s); if (!win->res) { - dev_dbg(&s->dev, "allocating mem region failed\n"); + dev_dbg(&p_dev->dev, "allocating mem region failed\n"); mutex_unlock(&s->ops_mutex); return -EINVAL; } @@ -851,7 +854,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha win->card_start = 0; if (s->ops->set_mem_map(s, win) != 0) { - dev_dbg(&s->dev, "failed to set memory mapping\n"); + dev_dbg(&p_dev->dev, "failed to set memory mapping\n"); mutex_unlock(&s->ops_mutex); return -EIO; } @@ -874,7 +877,7 @@ int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_ha if (win->res) request_resource(&iomem_resource, res); - dev_dbg(&s->dev, "request_window results in %pR\n", res); + dev_dbg(&p_dev->dev, "request_window results in %pR\n", res); mutex_unlock(&s->ops_mutex); *wh = res; diff --git a/drivers/power/apm_power.c b/drivers/power/apm_power.c index 936bae5..dc628cb 100644 --- a/drivers/power/apm_power.c +++ b/drivers/power/apm_power.c @@ -233,6 +233,7 @@ static int calculate_capacity(enum apm_source source) empty_design_prop = POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN; now_prop = POWER_SUPPLY_PROP_ENERGY_NOW; avg_prop = POWER_SUPPLY_PROP_ENERGY_AVG; + break; case SOURCE_VOLTAGE: full_prop = POWER_SUPPLY_PROP_VOLTAGE_MAX; empty_prop = POWER_SUPPLY_PROP_VOLTAGE_MIN; diff --git a/drivers/power/intel_mid_battery.c b/drivers/power/intel_mid_battery.c index c61ffec..2a10cd3 100644 --- a/drivers/power/intel_mid_battery.c +++ b/drivers/power/intel_mid_battery.c @@ -185,8 +185,8 @@ static int pmic_scu_ipc_battery_property_get(struct battery_property *prop) { u32 data[3]; u8 *p = (u8 *)&data[1]; - int err = intel_scu_ipc_command(IPC_CMD_BATTERY_PROPERTY, - IPCMSG_BATTERY, NULL, 0, data, 3); + int err = intel_scu_ipc_command(IPCMSG_BATTERY, + IPC_CMD_BATTERY_PROPERTY, NULL, 0, data, 3); prop->capacity = data[0]; prop->crnt = *p++; @@ -207,7 +207,7 @@ static int pmic_scu_ipc_battery_property_get(struct battery_property *prop) static int pmic_scu_ipc_set_charger(int charger) { - return intel_scu_ipc_simple_command(charger, IPCMSG_BATTERY); + return intel_scu_ipc_simple_command(IPCMSG_BATTERY, charger); } /** diff --git a/drivers/regulator/88pm8607.c b/drivers/regulator/88pm8607.c index 7d149a8..2ce2eb7 100644 --- a/drivers/regulator/88pm8607.c +++ b/drivers/regulator/88pm8607.c @@ -215,7 +215,7 @@ static int pm8607_list_voltage(struct regulator_dev *rdev, unsigned index) struct pm8607_regulator_info *info = rdev_get_drvdata(rdev); int ret = -EINVAL; - if (info->vol_table && (index < (2 << info->vol_nbits))) { + if (info->vol_table && (index < (1 << info->vol_nbits))) { ret = info->vol_table[index]; if (info->slope_double) ret <<= 1; @@ -233,7 +233,7 @@ static int choose_voltage(struct regulator_dev *rdev, int min_uV, int max_uV) max_uV = max_uV >> 1; } if (info->vol_table) { - for (i = 0; i < (2 << info->vol_nbits); i++) { + for (i = 0; i < (1 << info->vol_nbits); i++) { if (!info->vol_table[i]) break; if ((min_uV <= info->vol_table[i]) diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c index 1179099..b349266 100644 --- a/drivers/regulator/ab3100.c +++ b/drivers/regulator/ab3100.c @@ -634,12 +634,9 @@ static int __devinit ab3100_regulators_probe(struct platform_device *pdev) "%s: failed to register regulator %s err %d\n", __func__, ab3100_regulator_desc[i].name, err); - i--; /* remove the already registered regulators */ - while (i > 0) { + while (--i >= 0) regulator_unregister(ab3100_regulators[i].rdev); - i--; - } return err; } diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index dc3f1a4..28c7ae6 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -157,7 +157,7 @@ static int ab8500_list_voltage(struct regulator_dev *rdev, unsigned selector) if (info->fixed_uV) return info->fixed_uV; - if (selector > info->voltages_len) + if (selector >= info->voltages_len) return -EINVAL; return info->supported_voltages[selector]; @@ -344,13 +344,14 @@ static inline struct ab8500_regulator_info *find_regulator_info(int id) static __devinit int ab8500_regulator_probe(struct platform_device *pdev) { struct ab8500 *ab8500 = dev_get_drvdata(pdev->dev.parent); - struct ab8500_platform_data *pdata = dev_get_platdata(ab8500->dev); + struct ab8500_platform_data *pdata; int i, err; if (!ab8500) { dev_err(&pdev->dev, "null mfd parent\n"); return -EINVAL; } + pdata = dev_get_platdata(ab8500->dev); /* register all regulators */ for (i = 0; i < ARRAY_SIZE(ab8500_regulator_info); i++) { @@ -368,11 +369,9 @@ static __devinit int ab8500_regulator_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register regulator %s\n", info->desc.name); /* when we fail, un-register all earlier regulators */ - i--; - while (i > 0) { + while (--i >= 0) { info = &ab8500_regulator_info[i]; regulator_unregister(info->regulator); - i--; } return err; } diff --git a/drivers/regulator/ad5398.c b/drivers/regulator/ad5398.c index d59d2f2..df1fb53 100644 --- a/drivers/regulator/ad5398.c +++ b/drivers/regulator/ad5398.c @@ -25,7 +25,7 @@ struct ad5398_chip_info { unsigned int current_level; unsigned int current_mask; unsigned int current_offset; - struct regulator_dev rdev; + struct regulator_dev *rdev; }; static int ad5398_calc_current(struct ad5398_chip_info *chip, @@ -211,7 +211,6 @@ MODULE_DEVICE_TABLE(i2c, ad5398_id); static int __devinit ad5398_probe(struct i2c_client *client, const struct i2c_device_id *id) { - struct regulator_dev *rdev; struct regulator_init_data *init_data = client->dev.platform_data; struct ad5398_chip_info *chip; const struct ad5398_current_data_format *df = @@ -233,9 +232,10 @@ static int __devinit ad5398_probe(struct i2c_client *client, chip->current_offset = df->current_offset; chip->current_mask = (chip->current_level - 1) << chip->current_offset; - rdev = regulator_register(&ad5398_reg, &client->dev, init_data, chip); - if (IS_ERR(rdev)) { - ret = PTR_ERR(rdev); + chip->rdev = regulator_register(&ad5398_reg, &client->dev, + init_data, chip); + if (IS_ERR(chip->rdev)) { + ret = PTR_ERR(chip->rdev); dev_err(&client->dev, "failed to register %s %s\n", id->name, ad5398_reg.name); goto err; @@ -254,7 +254,7 @@ static int __devexit ad5398_remove(struct i2c_client *client) { struct ad5398_chip_info *chip = i2c_get_clientdata(client); - regulator_unregister(&chip->rdev); + regulator_unregister(chip->rdev); kfree(chip); i2c_set_clientdata(client, NULL); diff --git a/drivers/regulator/isl6271a-regulator.c b/drivers/regulator/isl6271a-regulator.c index e49d2bd..d61ecb8 100644 --- a/drivers/regulator/isl6271a-regulator.c +++ b/drivers/regulator/isl6271a-regulator.c @@ -165,7 +165,7 @@ static int __devinit isl6271a_probe(struct i2c_client *i2c, mutex_init(&pmic->mtx); for (i = 0; i < 3; i++) { - pmic->rdev[i] = regulator_register(&isl_rd[0], &i2c->dev, + pmic->rdev[i] = regulator_register(&isl_rd[i], &i2c->dev, init_data, pmic); if (IS_ERR(pmic->rdev[i])) { dev_err(&i2c->dev, "failed to register %s\n", id->name); diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c index 8867c27..559cfa2 100644 --- a/drivers/regulator/max1586.c +++ b/drivers/regulator/max1586.c @@ -121,14 +121,14 @@ static int max1586_v6_set(struct regulator_dev *rdev, int min_uV, int max_uV) if (max_uV < MAX1586_V6_MIN_UV || max_uV > MAX1586_V6_MAX_UV) return -EINVAL; - if (min_uV >= 3000000) - selector = 3; - if (min_uV < 3000000) - selector = 2; - if (min_uV < 2500000) - selector = 1; if (min_uV < 1800000) selector = 0; + else if (min_uV < 2500000) + selector = 1; + else if (min_uV < 3000000) + selector = 2; + else if (min_uV >= 3000000) + selector = 3; if (max1586_v6_calc_voltage(selector) > max_uV) return -EINVAL; diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c index ab67298..a1baf1f 100644 --- a/drivers/regulator/max8998.c +++ b/drivers/regulator/max8998.c @@ -549,7 +549,7 @@ static __devinit int max8998_pmic_probe(struct platform_device *pdev) if (!max8998) return -ENOMEM; - size = sizeof(struct regulator_dev *) * (pdata->num_regulators + 1); + size = sizeof(struct regulator_dev *) * pdata->num_regulators; max8998->rdev = kzalloc(size, GFP_KERNEL); if (!max8998->rdev) { kfree(max8998); @@ -557,7 +557,9 @@ static __devinit int max8998_pmic_probe(struct platform_device *pdev) } rdev = max8998->rdev; + max8998->dev = &pdev->dev; max8998->iodev = iodev; + max8998->num_regulators = pdata->num_regulators; platform_set_drvdata(pdev, max8998); for (i = 0; i < pdata->num_regulators; i++) { @@ -583,7 +585,7 @@ static __devinit int max8998_pmic_probe(struct platform_device *pdev) return 0; err: - for (i = 0; i <= max8998->num_regulators; i++) + for (i = 0; i < max8998->num_regulators; i++) if (rdev[i]) regulator_unregister(rdev[i]); @@ -599,7 +601,7 @@ static int __devexit max8998_pmic_remove(struct platform_device *pdev) struct regulator_dev **rdev = max8998->rdev; int i; - for (i = 0; i <= max8998->num_regulators; i++) + for (i = 0; i < max8998->num_regulators; i++) if (rdev[i]) regulator_unregister(rdev[i]); diff --git a/drivers/regulator/tps6507x-regulator.c b/drivers/regulator/tps6507x-regulator.c index c239f42..020f587 100644 --- a/drivers/regulator/tps6507x-regulator.c +++ b/drivers/regulator/tps6507x-regulator.c @@ -626,12 +626,6 @@ fail: return error; } -/** - * tps6507x_remove - TPS6507x driver i2c remove handler - * @client: i2c driver client device structure - * - * Unregister TPS driver as an i2c client device driver - */ static int __devexit tps6507x_pmic_remove(struct platform_device *pdev) { struct tps6507x_dev *tps6507x_dev = platform_get_drvdata(pdev); diff --git a/drivers/regulator/tps6586x-regulator.c b/drivers/regulator/tps6586x-regulator.c index 8cff141..51237fb 100644 --- a/drivers/regulator/tps6586x-regulator.c +++ b/drivers/regulator/tps6586x-regulator.c @@ -133,7 +133,7 @@ static int tps6586x_ldo_get_voltage(struct regulator_dev *rdev) mask = ((1 << ri->volt_nbits) - 1) << ri->volt_shift; val = (val & mask) >> ri->volt_shift; - if (val > ri->desc.n_voltages) + if (val >= ri->desc.n_voltages) BUG(); return ri->voltages[val] * 1000; @@ -150,7 +150,7 @@ static int tps6586x_dvm_set_voltage(struct regulator_dev *rdev, if (ret) return ret; - return tps6586x_set_bits(parent, ri->go_reg, ri->go_bit); + return tps6586x_set_bits(parent, ri->go_reg, 1 << ri->go_bit); } static int tps6586x_regulator_enable(struct regulator_dev *rdev) diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c index e686cdb..9edf8f6 100644 --- a/drivers/regulator/wm831x-ldo.c +++ b/drivers/regulator/wm831x-ldo.c @@ -215,8 +215,7 @@ static int wm831x_gp_ldo_set_mode(struct regulator_dev *rdev, case REGULATOR_MODE_IDLE: ret = wm831x_set_bits(wm831x, ctrl_reg, - WM831X_LDO1_LP_MODE, - WM831X_LDO1_LP_MODE); + WM831X_LDO1_LP_MODE, 0); if (ret < 0) return ret; @@ -225,10 +224,12 @@ static int wm831x_gp_ldo_set_mode(struct regulator_dev *rdev, WM831X_LDO1_ON_MODE); if (ret < 0) return ret; + break; case REGULATOR_MODE_STANDBY: ret = wm831x_set_bits(wm831x, ctrl_reg, - WM831X_LDO1_LP_MODE, 0); + WM831X_LDO1_LP_MODE, + WM831X_LDO1_LP_MODE); if (ret < 0) return ret; diff --git a/drivers/regulator/wm8350-regulator.c b/drivers/regulator/wm8350-regulator.c index 0e6ed7d..fe4b8a8 100644 --- a/drivers/regulator/wm8350-regulator.c +++ b/drivers/regulator/wm8350-regulator.c @@ -1129,7 +1129,7 @@ static unsigned int wm8350_dcdc_get_mode(struct regulator_dev *rdev) mode = REGULATOR_MODE_NORMAL; } else if (!active && !sleep) mode = REGULATOR_MODE_IDLE; - else if (!sleep) + else if (sleep) mode = REGULATOR_MODE_STANDBY; return mode; diff --git a/drivers/rtc/rtc-bfin.c b/drivers/rtc/rtc-bfin.c index 72b2bcc..d4fb82d 100644 --- a/drivers/rtc/rtc-bfin.c +++ b/drivers/rtc/rtc-bfin.c @@ -426,7 +426,7 @@ static int bfin_rtc_suspend(struct platform_device *pdev, pm_message_t state) enable_irq_wake(IRQ_RTC); bfin_rtc_sync_pending(&pdev->dev); } else - bfin_rtc_int_clear(-1); + bfin_rtc_int_clear(0); return 0; } @@ -435,8 +435,17 @@ static int bfin_rtc_resume(struct platform_device *pdev) { if (device_may_wakeup(&pdev->dev)) disable_irq_wake(IRQ_RTC); - else - bfin_write_RTC_ISTAT(-1); + + /* + * Since only some of the RTC bits are maintained externally in the + * Vbat domain, we need to wait for the RTC MMRs to be synced into + * the core after waking up. This happens every RTC 1HZ. Once that + * has happened, we can go ahead and re-enable the important write + * complete interrupt event. + */ + while (!(bfin_read_RTC_ISTAT() & RTC_ISTAT_SEC)) + continue; + bfin_rtc_int_set(RTC_ISTAT_WRITE_COMPLETE); return 0; } diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c index 66377f3..d60557c 100644 --- a/drivers/rtc/rtc-m41t80.c +++ b/drivers/rtc/rtc-m41t80.c @@ -364,7 +364,7 @@ static int m41t80_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *t) t->time.tm_isdst = -1; t->enabled = !!(reg[M41T80_REG_ALARM_MON] & M41T80_ALMON_AFE); t->pending = !!(reg[M41T80_REG_FLAGS] & M41T80_FLAGS_AF); - return rtc_valid_tm(t); + return 0; } static struct rtc_class_ops m41t80_rtc_ops = { diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 6c418fe..b7a6690 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -403,7 +403,7 @@ static int pl031_probe(struct amba_device *adev, struct amba_id *id) } if (request_irq(adev->irq[0], pl031_interrupt, - IRQF_DISABLED | IRQF_SHARED, "rtc-pl031", ldata)) { + IRQF_DISABLED, "rtc-pl031", ldata)) { ret = -EIO; goto out_no_irq; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8373ca0..9b106d8 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2197,7 +2197,6 @@ static void dasd_setup_queue(struct dasd_block *block) */ blk_queue_max_segment_size(block->request_queue, PAGE_SIZE); blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1); - blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN); } /* diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index b7de025..85cf607 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -217,8 +217,7 @@ tapeblock_setup_device(struct tape_device * device) if (!blkdat->request_queue) return -ENOMEM; - elevator_exit(blkdat->request_queue->elevator); - rc = elevator_init(blkdat->request_queue, "noop"); + rc = elevator_change(blkdat->request_queue, "noop"); if (rc) goto cleanup_queue; diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index ae10883..50286d8 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -634,6 +634,7 @@ void zfcp_scsi_set_prot(struct zfcp_adapter *adapter) adapter->adapter_features & FSF_FEATURE_DIX_PROT_TCPIP) { mask |= SHOST_DIX_TYPE1_PROTECTION; scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); + shost->sg_prot_tablesize = ZFCP_QDIO_MAX_SBALES_PER_REQ / 2; shost->sg_tablesize = ZFCP_QDIO_MAX_SBALES_PER_REQ / 2; shost->max_sectors = ZFCP_QDIO_MAX_SBALES_PER_REQ * 8 / 2; } diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c index 93984c9..aee73fa 100644 --- a/drivers/scsi/aic7xxx_old.c +++ b/drivers/scsi/aic7xxx_old.c @@ -2850,12 +2850,6 @@ aic7xxx_done(struct aic7xxx_host *p, struct aic7xxx_scb *scb) aic_dev->r_total++; ptr = aic_dev->r_bins; } - if(cmd->device->simple_tags && cmd->request->cmd_flags & REQ_HARDBARRIER) - { - aic_dev->barrier_total++; - if(scb->tag_action == MSG_ORDERED_Q_TAG) - aic_dev->ordered_total++; - } x = scb->sg_length; x >>= 10; for(i=0; i<6; i++) @@ -10125,7 +10119,6 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd, struct aic_dev_data *aic_dev = cmd->device->hostdata; struct scsi_device *sdptr = cmd->device; unsigned char tindex = TARGET_INDEX(cmd); - struct request *req = cmd->request; int use_sg; mask = (0x01 << tindex); @@ -10144,19 +10137,8 @@ static void aic7xxx_buildscb(struct aic7xxx_host *p, struct scsi_cmnd *cmd, /* We always force TEST_UNIT_READY to untagged */ if (cmd->cmnd[0] != TEST_UNIT_READY && sdptr->simple_tags) { - if (req->cmd_flags & REQ_HARDBARRIER) - { - if(sdptr->ordered_tags) - { - hscb->control |= MSG_ORDERED_Q_TAG; - scb->tag_action = MSG_ORDERED_Q_TAG; - } - } - else - { - hscb->control |= MSG_SIMPLE_Q_TAG; - scb->tag_action = MSG_SIMPLE_Q_TAG; - } + hscb->control |= MSG_SIMPLE_Q_TAG; + scb->tag_action = MSG_SIMPLE_Q_TAG; } } if ( !(aic_dev->dtr_pending) && diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index 7d4d227..7f11f3e 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -300,8 +300,7 @@ int beiscsi_get_host_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf) { struct beiscsi_hba *phba = (struct beiscsi_hba *)iscsi_host_priv(shost); - int len = 0; - int status; + int status = 0; SE_DEBUG(DBG_LVL_8, "In beiscsi_get_host_param, param= %d\n", param); switch (param) { @@ -315,7 +314,7 @@ int beiscsi_get_host_param(struct Scsi_Host *shost, default: return iscsi_host_get_param(shost, param, buf); } - return len; + return status; } int beiscsi_get_macaddr(char *buf, struct beiscsi_hba *phba) diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c index 26350e4..877324f 100644 --- a/drivers/scsi/be2iscsi/be_mgmt.c +++ b/drivers/scsi/be2iscsi/be_mgmt.c @@ -368,7 +368,7 @@ int mgmt_open_connection(struct beiscsi_hba *phba, memset(req, 0, sizeof(*req)); wrb->tag0 |= tag; - be_wrb_hdr_prepare(wrb, sizeof(*req), true, 1); + be_wrb_hdr_prepare(wrb, sizeof(*req), false, 1); be_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_ISCSI, OPCODE_COMMON_ISCSI_TCP_CONNECT_AND_OFFLOAD, sizeof(*req)); diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c index cd05e04..d0c8234 100644 --- a/drivers/scsi/constants.c +++ b/drivers/scsi/constants.c @@ -1404,13 +1404,13 @@ void scsi_print_sense(char *name, struct scsi_cmnd *cmd) { struct scsi_sense_hdr sshdr; - scmd_printk(KERN_INFO, cmd, ""); + scmd_printk(KERN_INFO, cmd, " "); scsi_decode_sense_buffer(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &sshdr); scsi_show_sense_hdr(&sshdr); scsi_decode_sense_extras(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &sshdr); - scmd_printk(KERN_INFO, cmd, ""); + scmd_printk(KERN_INFO, cmd, " "); scsi_show_extd_sense(sshdr.asc, sshdr.ascq); } EXPORT_SYMBOL(scsi_print_sense); @@ -1453,7 +1453,7 @@ EXPORT_SYMBOL(scsi_show_result); void scsi_print_result(struct scsi_cmnd *cmd) { - scmd_printk(KERN_INFO, cmd, ""); + scmd_printk(KERN_INFO, cmd, " "); scsi_show_result(cmd->result); } EXPORT_SYMBOL(scsi_print_result); diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8a8f803..1047815 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -376,6 +376,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->this_id = sht->this_id; shost->can_queue = sht->can_queue; shost->sg_tablesize = sht->sg_tablesize; + shost->sg_prot_tablesize = sht->sg_prot_tablesize; shost->cmd_per_lun = sht->cmd_per_lun; shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 4f5551b..c5d0606 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3231,6 +3231,12 @@ static __devinit int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev) misc_fw_support = readl(&cfgtable->misc_fw_support); use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET; + /* The doorbell reset seems to cause lockups on some Smart + * Arrays (e.g. P410, P410i, maybe others). Until this is + * fixed or at least isolated, avoid the doorbell reset. + */ + use_doorbell = 0; + rc = hpsa_controller_hard_reset(pdev, vaddr, use_doorbell); if (rc) goto unmap_cfgtable; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index f0cfba9..535085c 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -130,17 +130,6 @@ static void sas_scsi_task_done(struct sas_task *task) sc->scsi_done(sc); } -static enum task_attribute sas_scsi_get_task_attr(struct scsi_cmnd *cmd) -{ - enum task_attribute ta = TASK_ATTR_SIMPLE; - if (cmd->request && blk_rq_tagged(cmd->request)) { - if (cmd->device->ordered_tags && - (cmd->request->cmd_flags & REQ_HARDBARRIER)) - ta = TASK_ATTR_ORDERED; - } - return ta; -} - static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, struct domain_device *dev, gfp_t gfp_flags) @@ -160,7 +149,7 @@ static struct sas_task *sas_create_task(struct scsi_cmnd *cmd, task->ssp_task.retry_count = 1; int_to_scsilun(cmd->device->lun, &lun); memcpy(task->ssp_task.LUN, &lun.scsi_lun, 8); - task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); + task->ssp_task.task_attr = TASK_ATTR_SIMPLE; memcpy(task->ssp_task.cdb, cmd->cmnd, 16); task->scatter = scsi_sglist(cmd); diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index fda4de3..e88bbdd 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -865,7 +865,7 @@ void osd_req_read(struct osd_request *or, { _osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len); WARN_ON(or->in.bio || or->in.total_bytes); - WARN_ON(1 == (bio->bi_rw & REQ_WRITE)); + WARN_ON(bio->bi_rw & REQ_WRITE); or->in.bio = bio; or->in.total_bytes = len; } diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index c978105..3cd794f 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1847,26 +1847,33 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) qla24xx_disable_vp(vha); + vha->flags.delete_progress = 1; + fc_remove_host(vha->host); scsi_remove_host(vha->host); - qla2x00_free_fcports(vha); + if (vha->timer_active) { + qla2x00_vp_stop_timer(vha); + DEBUG15(printk(KERN_INFO "scsi(%ld): timer for the vport[%d]" + " = %p has stopped\n", vha->host_no, vha->vp_idx, vha)); + } qla24xx_deallocate_vp_id(vha); + /* No pending activities shall be there on the vha now */ + DEBUG(msleep(random32()%10)); /* Just to see if something falls on + * the net we have placed below */ + + BUG_ON(atomic_read(&vha->vref_count)); + + qla2x00_free_fcports(vha); + mutex_lock(&ha->vport_lock); ha->cur_vport_count--; clear_bit(vha->vp_idx, ha->vp_idx_map); mutex_unlock(&ha->vport_lock); - if (vha->timer_active) { - qla2x00_vp_stop_timer(vha); - DEBUG15(printk ("scsi(%ld): timer for the vport[%d] = %p " - "has stopped\n", - vha->host_no, vha->vp_idx, vha)); - } - if (vha->req->id && !ha->flags.cpu_affinity_enabled) { if (qla25xx_delete_req_que(vha, vha->req) != QLA_SUCCESS) qla_printk(KERN_WARNING, ha, diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 6cfc28a..b74e6b5 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -29,8 +29,6 @@ /* #define QL_DEBUG_LEVEL_17 */ /* Output EEH trace messages */ /* #define QL_DEBUG_LEVEL_18 */ /* Output T10 CRC trace messages */ -/* #define QL_PRINTK_BUF */ /* Captures printk to buffer */ - /* * Macros use for debugging the driver. */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index df7d74f..e1d3ad40 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2646,6 +2646,7 @@ struct qla_hw_data { #define MBX_UPDATE_FLASH_ACTIVE 3 struct mutex vport_lock; /* Virtual port synchronization */ + spinlock_t vport_slock; /* order is hardware_lock, then vport_slock */ struct completion mbx_cmd_comp; /* Serialize mbx access */ struct completion mbx_intr_comp; /* Used for completion notification */ struct completion dcbx_comp; /* For set port config notification */ @@ -2833,6 +2834,7 @@ typedef struct scsi_qla_host { uint32_t management_server_logged_in :1; uint32_t process_response_queue :1; uint32_t difdix_supported:1; + uint32_t delete_progress:1; } flags; atomic_t loop_state; @@ -2928,6 +2930,8 @@ typedef struct scsi_qla_host { struct req_que *req; int fw_heartbeat_counter; int seconds_since_last_heartbeat; + + atomic_t vref_count; } scsi_qla_host_t; /* @@ -2938,6 +2942,22 @@ typedef struct scsi_qla_host { test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags) || \ atomic_read(&ha->loop_state) == LOOP_DOWN) +#define QLA_VHA_MARK_BUSY(__vha, __bail) do { \ + atomic_inc(&__vha->vref_count); \ + mb(); \ + if (__vha->flags.delete_progress) { \ + atomic_dec(&__vha->vref_count); \ + __bail = 1; \ + } else { \ + __bail = 0; \ + } \ +} while (0) + +#define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ + atomic_dec(&__vha->vref_count); \ +} while (0) + + #define qla_printk(level, ha, format, arg...) \ dev_printk(level , &((ha)->pdev->dev) , format , ## arg) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index c2d7bb8..3cafbef 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -69,21 +69,29 @@ qla2x00_ctx_sp_free(srb_t *sp) { struct srb_ctx *ctx = sp->ctx; struct srb_iocb *iocb = ctx->u.iocb_cmd; + struct scsi_qla_host *vha = sp->fcport->vha; del_timer_sync(&iocb->timer); kfree(iocb); kfree(ctx); mempool_free(sp, sp->fcport->vha->hw->srb_mempool); + + QLA_VHA_MARK_NOT_BUSY(vha); } inline srb_t * qla2x00_get_ctx_sp(scsi_qla_host_t *vha, fc_port_t *fcport, size_t size, unsigned long tmo) { - srb_t *sp; + srb_t *sp = NULL; struct qla_hw_data *ha = vha->hw; struct srb_ctx *ctx; struct srb_iocb *iocb; + uint8_t bail; + + QLA_VHA_MARK_BUSY(vha, bail); + if (bail) + return NULL; sp = mempool_alloc(ha->srb_mempool, GFP_KERNEL); if (!sp) @@ -116,6 +124,8 @@ qla2x00_get_ctx_sp(scsi_qla_host_t *vha, fc_port_t *fcport, size_t size, iocb->timer.function = qla2x00_ctx_sp_timeout; add_timer(&iocb->timer); done: + if (!sp) + QLA_VHA_MARK_NOT_BUSY(vha); return sp; } @@ -1808,11 +1818,15 @@ qla2x00_init_rings(scsi_qla_host_t *vha) qla2x00_init_response_q_entries(rsp); } + spin_lock_irqsave(&ha->vport_slock, flags); /* Clear RSCN queue. */ list_for_each_entry(vp, &ha->vp_list, list) { vp->rscn_in_ptr = 0; vp->rscn_out_ptr = 0; } + + spin_unlock_irqrestore(&ha->vport_slock, flags); + ha->isp_ops->config_rings(vha); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -3252,12 +3266,17 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, /* Bypass virtual ports of the same host. */ found = 0; if (ha->num_vhosts) { + unsigned long flags; + + spin_lock_irqsave(&ha->vport_slock, flags); list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { if (new_fcport->d_id.b24 == vp->d_id.b24) { found = 1; break; } } + spin_unlock_irqrestore(&ha->vport_slock, flags); + if (found) continue; } @@ -3377,6 +3396,7 @@ qla2x00_find_new_loop_id(scsi_qla_host_t *vha, fc_port_t *dev) struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp; struct scsi_qla_host *tvp; + unsigned long flags = 0; rval = QLA_SUCCESS; @@ -3401,6 +3421,8 @@ qla2x00_find_new_loop_id(scsi_qla_host_t *vha, fc_port_t *dev) /* Check for loop ID being already in use. */ found = 0; fcport = NULL; + + spin_lock_irqsave(&ha->vport_slock, flags); list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { list_for_each_entry(fcport, &vp->vp_fcports, list) { if (fcport->loop_id == dev->loop_id && @@ -3413,6 +3435,7 @@ qla2x00_find_new_loop_id(scsi_qla_host_t *vha, fc_port_t *dev) if (found) break; } + spin_unlock_irqrestore(&ha->vport_slock, flags); /* If not in use then it is free to use. */ if (!found) { @@ -3825,14 +3848,27 @@ void qla2x00_update_fcports(scsi_qla_host_t *base_vha) { fc_port_t *fcport; - struct scsi_qla_host *tvp, *vha; + struct scsi_qla_host *vha; + struct qla_hw_data *ha = base_vha->hw; + unsigned long flags; + spin_lock_irqsave(&ha->vport_slock, flags); /* Go with deferred removal of rport references. */ - list_for_each_entry_safe(vha, tvp, &base_vha->hw->vp_list, list) - list_for_each_entry(fcport, &vha->vp_fcports, list) + list_for_each_entry(vha, &base_vha->hw->vp_list, list) { + atomic_inc(&vha->vref_count); + list_for_each_entry(fcport, &vha->vp_fcports, list) { if (fcport && fcport->drport && - atomic_read(&fcport->state) != FCS_UNCONFIGURED) + atomic_read(&fcport->state) != FCS_UNCONFIGURED) { + spin_unlock_irqrestore(&ha->vport_slock, flags); + qla2x00_rport_del(fcport); + + spin_lock_irqsave(&ha->vport_slock, flags); + } + } + atomic_dec(&vha->vref_count); + } + spin_unlock_irqrestore(&ha->vport_slock, flags); } void @@ -3840,7 +3876,7 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) { struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp, *base_vha = pci_get_drvdata(ha->pdev); - struct scsi_qla_host *tvp; + unsigned long flags; vha->flags.online = 0; ha->flags.chip_reset_done = 0; @@ -3858,8 +3894,18 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) if (atomic_read(&vha->loop_state) != LOOP_DOWN) { atomic_set(&vha->loop_state, LOOP_DOWN); qla2x00_mark_all_devices_lost(vha, 0); - list_for_each_entry_safe(vp, tvp, &base_vha->hw->vp_list, list) + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &base_vha->hw->vp_list, list) { + atomic_inc(&vp->vref_count); + spin_unlock_irqrestore(&ha->vport_slock, flags); + qla2x00_mark_all_devices_lost(vp, 0); + + spin_lock_irqsave(&ha->vport_slock, flags); + atomic_dec(&vp->vref_count); + } + spin_unlock_irqrestore(&ha->vport_slock, flags); } else { if (!atomic_read(&vha->loop_down_timer)) atomic_set(&vha->loop_down_timer, @@ -3898,8 +3944,8 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) uint8_t status = 0; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *vp; - struct scsi_qla_host *tvp; struct req_que *req = ha->req_q_map[0]; + unsigned long flags; if (vha->flags.online) { qla2x00_abort_isp_cleanup(vha); @@ -4006,10 +4052,21 @@ qla2x00_abort_isp(scsi_qla_host_t *vha) DEBUG(printk(KERN_INFO "qla2x00_abort_isp(%ld): succeeded.\n", vha->host_no)); - list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { - if (vp->vp_idx) + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + if (vp->vp_idx) { + atomic_inc(&vp->vref_count); + spin_unlock_irqrestore(&ha->vport_slock, flags); + qla2x00_vp_abort_isp(vp); + + spin_lock_irqsave(&ha->vport_slock, flags); + atomic_dec(&vp->vref_count); + } } + spin_unlock_irqrestore(&ha->vport_slock, flags); + } else { qla_printk(KERN_INFO, ha, "qla2x00_abort_isp: **** FAILED ****\n"); @@ -5221,7 +5278,7 @@ qla82xx_restart_isp(scsi_qla_host_t *vha) struct req_que *req = ha->req_q_map[0]; struct rsp_que *rsp = ha->rsp_q_map[0]; struct scsi_qla_host *vp; - struct scsi_qla_host *tvp; + unsigned long flags; status = qla2x00_init_rings(vha); if (!status) { @@ -5308,10 +5365,21 @@ qla82xx_restart_isp(scsi_qla_host_t *vha) DEBUG(printk(KERN_INFO "qla82xx_restart_isp(%ld): succeeded.\n", vha->host_no)); - list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { - if (vp->vp_idx) + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + if (vp->vp_idx) { + atomic_inc(&vp->vref_count); + spin_unlock_irqrestore(&ha->vport_slock, flags); + qla2x00_vp_abort_isp(vp); + + spin_lock_irqsave(&ha->vport_slock, flags); + atomic_dec(&vp->vref_count); + } } + spin_unlock_irqrestore(&ha->vport_slock, flags); + } else { qla_printk(KERN_INFO, ha, "qla82xx_restart_isp: **** FAILED ****\n"); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index bb4d63a..e0e43d9 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1712,19 +1712,20 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) cp->result = DID_ERROR << 16; break; } - } else if (!lscsi_status) { + } else { DEBUG2(qla_printk(KERN_INFO, ha, "scsi(%ld:%d:%d) Dropped frame(s) detected (0x%x " "of 0x%x bytes).\n", vha->host_no, cp->device->id, cp->device->lun, resid, scsi_bufflen(cp))); - cp->result = DID_ERROR << 16; - break; + cp->result = DID_ERROR << 16 | lscsi_status; + goto check_scsi_status; } cp->result = DID_OK << 16 | lscsi_status; logit = 0; +check_scsi_status: /* * Check to see if SCSI Status is non zero. If so report SCSI * Status. diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 5202408..effd8a1 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -2913,7 +2913,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, uint16_t stat = le16_to_cpu(rptid_entry->vp_idx); struct qla_hw_data *ha = vha->hw; scsi_qla_host_t *vp; - scsi_qla_host_t *tvp; + unsigned long flags; if (rptid_entry->entry_status != 0) return; @@ -2945,9 +2945,12 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, return; } - list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) if (vp_idx == vp->vp_idx) break; + spin_unlock_irqrestore(&ha->vport_slock, flags); + if (!vp) return; diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 987c5b0..2b69392 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -30,6 +30,7 @@ qla24xx_allocate_vp_id(scsi_qla_host_t *vha) { uint32_t vp_id; struct qla_hw_data *ha = vha->hw; + unsigned long flags; /* Find an empty slot and assign an vp_id */ mutex_lock(&ha->vport_lock); @@ -44,7 +45,11 @@ qla24xx_allocate_vp_id(scsi_qla_host_t *vha) set_bit(vp_id, ha->vp_idx_map); ha->num_vhosts++; vha->vp_idx = vp_id; + + spin_lock_irqsave(&ha->vport_slock, flags); list_add_tail(&vha->list, &ha->vp_list); + spin_unlock_irqrestore(&ha->vport_slock, flags); + mutex_unlock(&ha->vport_lock); return vp_id; } @@ -54,12 +59,31 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) { uint16_t vp_id; struct qla_hw_data *ha = vha->hw; + unsigned long flags = 0; mutex_lock(&ha->vport_lock); + /* + * Wait for all pending activities to finish before removing vport from + * the list. + * Lock needs to be held for safe removal from the list (it + * ensures no active vp_list traversal while the vport is removed + * from the queue) + */ + spin_lock_irqsave(&ha->vport_slock, flags); + while (atomic_read(&vha->vref_count)) { + spin_unlock_irqrestore(&ha->vport_slock, flags); + + msleep(500); + + spin_lock_irqsave(&ha->vport_slock, flags); + } + list_del(&vha->list); + spin_unlock_irqrestore(&ha->vport_slock, flags); + vp_id = vha->vp_idx; ha->num_vhosts--; clear_bit(vp_id, ha->vp_idx_map); - list_del(&vha->list); + mutex_unlock(&ha->vport_lock); } @@ -68,12 +92,17 @@ qla24xx_find_vhost_by_name(struct qla_hw_data *ha, uint8_t *port_name) { scsi_qla_host_t *vha; struct scsi_qla_host *tvha; + unsigned long flags; + spin_lock_irqsave(&ha->vport_slock, flags); /* Locate matching device in database. */ list_for_each_entry_safe(vha, tvha, &ha->vp_list, list) { - if (!memcmp(port_name, vha->port_name, WWN_SIZE)) + if (!memcmp(port_name, vha->port_name, WWN_SIZE)) { + spin_unlock_irqrestore(&ha->vport_slock, flags); return vha; + } } + spin_unlock_irqrestore(&ha->vport_slock, flags); return NULL; } @@ -93,6 +122,12 @@ qla24xx_find_vhost_by_name(struct qla_hw_data *ha, uint8_t *port_name) static void qla2x00_mark_vp_devices_dead(scsi_qla_host_t *vha) { + /* + * !!! NOTE !!! + * This function, if called in contexts other than vp create, disable + * or delete, please make sure this is synchronized with the + * delete thread. + */ fc_port_t *fcport; list_for_each_entry(fcport, &vha->vp_fcports, list) { @@ -100,7 +135,6 @@ qla2x00_mark_vp_devices_dead(scsi_qla_host_t *vha) "loop_id=0x%04x :%x\n", vha->host_no, fcport->loop_id, fcport->vp_idx)); - atomic_set(&fcport->state, FCS_DEVICE_DEAD); qla2x00_mark_device_lost(vha, fcport, 0, 0); atomic_set(&fcport->state, FCS_UNCONFIGURED); } @@ -194,12 +228,17 @@ qla24xx_configure_vp(scsi_qla_host_t *vha) void qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) { - scsi_qla_host_t *vha, *tvha; + scsi_qla_host_t *vha; struct qla_hw_data *ha = rsp->hw; int i = 0; + unsigned long flags; - list_for_each_entry_safe(vha, tvha, &ha->vp_list, list) { + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vha, &ha->vp_list, list) { if (vha->vp_idx) { + atomic_inc(&vha->vref_count); + spin_unlock_irqrestore(&ha->vport_slock, flags); + switch (mb[0]) { case MBA_LIP_OCCURRED: case MBA_LOOP_UP: @@ -215,9 +254,13 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) qla2x00_async_event(vha, rsp, mb); break; } + + spin_lock_irqsave(&ha->vport_slock, flags); + atomic_dec(&vha->vref_count); } i++; } + spin_unlock_irqrestore(&ha->vport_slock, flags); } int @@ -297,7 +340,7 @@ qla2x00_do_dpc_all_vps(scsi_qla_host_t *vha) int ret; struct qla_hw_data *ha = vha->hw; scsi_qla_host_t *vp; - struct scsi_qla_host *tvp; + unsigned long flags = 0; if (vha->vp_idx) return; @@ -309,10 +352,19 @@ qla2x00_do_dpc_all_vps(scsi_qla_host_t *vha) if (!(ha->current_topology & ISP_CFG_F)) return; - list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { - if (vp->vp_idx) + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + if (vp->vp_idx) { + atomic_inc(&vp->vref_count); + spin_unlock_irqrestore(&ha->vport_slock, flags); + ret = qla2x00_do_dpc_vp(vp); + + spin_lock_irqsave(&ha->vport_slock, flags); + atomic_dec(&vp->vref_count); + } } + spin_unlock_irqrestore(&ha->vport_slock, flags); } int diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 8d8e40b..8d9edfb 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -2681,6 +2681,19 @@ qla82xx_start_scsi(srb_t *sp) sufficient_dsds: req_cnt = 1; + if (req->cnt < (req_cnt + 2)) { + cnt = (uint16_t)RD_REG_DWORD_RELAXED( + ®->req_q_out[0]); + if (req->ring_index < cnt) + req->cnt = cnt - req->ring_index; + else + req->cnt = req->length - + (req->ring_index - cnt); + } + + if (req->cnt < (req_cnt + 2)) + goto queuing_error; + ctx = sp->ctx = mempool_alloc(ha->ctx_mempool, GFP_ATOMIC); if (!sp->ctx) { DEBUG(printk(KERN_INFO @@ -3352,16 +3365,19 @@ qla82xx_check_fw_alive(scsi_qla_host_t *vha) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); } qla2xxx_wake_dpc(vha); + ha->flags.fw_hung = 1; if (ha->flags.mbox_busy) { - ha->flags.fw_hung = 1; ha->flags.mbox_int = 1; DEBUG2(qla_printk(KERN_ERR, ha, - "Due to fw hung, doing premature " - "completion of mbx command\n")); - complete(&ha->mbx_intr_comp); + "Due to fw hung, doing premature " + "completion of mbx command\n")); + if (test_bit(MBX_INTR_WAIT, + &ha->mbx_cmd_flags)) + complete(&ha->mbx_intr_comp); } } - } + } else + vha->seconds_since_last_heartbeat = 0; vha->fw_heartbeat_counter = fw_heartbeat_counter; } @@ -3463,13 +3479,15 @@ void qla82xx_watchdog(scsi_qla_host_t *vha) "%s(): Adapter reset needed!\n", __func__); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); + ha->flags.fw_hung = 1; if (ha->flags.mbox_busy) { - ha->flags.fw_hung = 1; ha->flags.mbox_int = 1; DEBUG2(qla_printk(KERN_ERR, ha, - "Need reset, doing premature " - "completion of mbx command\n")); - complete(&ha->mbx_intr_comp); + "Need reset, doing premature " + "completion of mbx command\n")); + if (test_bit(MBX_INTR_WAIT, + &ha->mbx_cmd_flags)) + complete(&ha->mbx_intr_comp); } } else { qla82xx_check_fw_alive(vha); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index bdd53f0..318235f 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2344,16 +2344,28 @@ probe_out: static void qla2x00_remove_one(struct pci_dev *pdev) { - scsi_qla_host_t *base_vha, *vha, *temp; + scsi_qla_host_t *base_vha, *vha; struct qla_hw_data *ha; + unsigned long flags; base_vha = pci_get_drvdata(pdev); ha = base_vha->hw; - list_for_each_entry_safe(vha, temp, &ha->vp_list, list) { - if (vha && vha->fc_vport) + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vha, &ha->vp_list, list) { + atomic_inc(&vha->vref_count); + + if (vha && vha->fc_vport) { + spin_unlock_irqrestore(&ha->vport_slock, flags); + fc_vport_terminate(vha->fc_vport); + + spin_lock_irqsave(&ha->vport_slock, flags); + } + + atomic_dec(&vha->vref_count); } + spin_unlock_irqrestore(&ha->vport_slock, flags); set_bit(UNLOADING, &base_vha->dpc_flags); @@ -2978,10 +2990,17 @@ static struct qla_work_evt * qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type) { struct qla_work_evt *e; + uint8_t bail; + + QLA_VHA_MARK_BUSY(vha, bail); + if (bail) + return NULL; e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC); - if (!e) + if (!e) { + QLA_VHA_MARK_NOT_BUSY(vha); return NULL; + } INIT_LIST_HEAD(&e->list); e->type = type; @@ -3138,6 +3157,9 @@ qla2x00_do_work(struct scsi_qla_host *vha) } if (e->flags & QLA_EVT_FLAG_FREE) kfree(e); + + /* For each work completed decrement vha ref count */ + QLA_VHA_MARK_NOT_BUSY(vha); } } diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index e75ccb9..8edbccb 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,9 +7,9 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.03.03-k0" +#define QLA2XXX_VERSION "8.03.04-k0" #define QLA_DRIVER_MAJOR_VER 8 #define QLA_DRIVER_MINOR_VER 3 -#define QLA_DRIVER_PATCH_VER 3 +#define QLA_DRIVER_PATCH_VER 4 #define QLA_DRIVER_BETA_VER 0 diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index ad0ed21..348fba0 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1046,13 +1046,13 @@ int scsi_get_vpd_page(struct scsi_device *sdev, u8 page, unsigned char *buf, /* If the user actually wanted this page, we can skip the rest */ if (page == 0) - return -EINVAL; + return 0; for (i = 0; i < min((int)buf[3], buf_len - 4); i++) if (buf[i + 4] == page) goto found; - if (i < buf[3] && i > buf_len) + if (i < buf[3] && i >= buf_len - 4) /* ran off the end of the buffer, give us benefit of doubt */ goto found; /* The device claims it doesn't support the requested page */ diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9ade720..8041fe1 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -968,11 +968,13 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, */ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) { - int error = scsi_init_sgtable(cmd->request, &cmd->sdb, gfp_mask); + struct request *rq = cmd->request; + + int error = scsi_init_sgtable(rq, &cmd->sdb, gfp_mask); if (error) goto err_exit; - if (blk_bidi_rq(cmd->request)) { + if (blk_bidi_rq(rq)) { struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc( scsi_sdb_cache, GFP_ATOMIC); if (!bidi_sdb) { @@ -980,28 +982,28 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) goto err_exit; } - cmd->request->next_rq->special = bidi_sdb; - error = scsi_init_sgtable(cmd->request->next_rq, bidi_sdb, - GFP_ATOMIC); + rq->next_rq->special = bidi_sdb; + error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC); if (error) goto err_exit; } - if (blk_integrity_rq(cmd->request)) { + if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; int ivecs, count; BUG_ON(prot_sdb == NULL); - ivecs = blk_rq_count_integrity_sg(cmd->request); + ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { error = BLKPREP_DEFER; goto err_exit; } - count = blk_rq_map_integrity_sg(cmd->request, + count = blk_rq_map_integrity_sg(rq->q, rq->bio, prot_sdb->table.sgl); BUG_ON(unlikely(count > ivecs)); + BUG_ON(unlikely(count > queue_max_integrity_segments(rq->q))); cmd->prot_sdb = prot_sdb; cmd->prot_sdb->table.nents = count; @@ -1011,8 +1013,8 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) err_exit: scsi_release_buffers(cmd); - scsi_put_command(cmd); cmd->request->special = NULL; + scsi_put_command(cmd); return error; } EXPORT_SYMBOL(scsi_init_io); @@ -1625,6 +1627,14 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, blk_queue_max_segments(q, min_t(unsigned short, shost->sg_tablesize, SCSI_MAX_SG_CHAIN_SEGMENTS)); + if (scsi_host_prot_dma(shost)) { + shost->sg_prot_tablesize = + min_not_zero(shost->sg_prot_tablesize, + (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); + BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); + blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize); + } + blk_queue_max_hw_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index c3f6737..20ad59d 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -251,6 +251,7 @@ shost_rd_attr(host_busy, "%hu\n"); shost_rd_attr(cmd_per_lun, "%hd\n"); shost_rd_attr(can_queue, "%hd\n"); shost_rd_attr(sg_tablesize, "%hu\n"); +shost_rd_attr(sg_prot_tablesize, "%hu\n"); shost_rd_attr(unchecked_isa_dma, "%d\n"); shost_rd_attr(prot_capabilities, "%u\n"); shost_rd_attr(prot_guard_type, "%hd\n"); @@ -262,6 +263,7 @@ static struct attribute *scsi_sysfs_shost_attrs[] = { &dev_attr_cmd_per_lun.attr, &dev_attr_can_queue.attr, &dev_attr_sg_tablesize.attr, + &dev_attr_sg_prot_tablesize.attr, &dev_attr_unchecked_isa_dma.attr, &dev_attr_proc_name.attr, &dev_attr_scan.attr, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0c4f89cf..c61934d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -870,7 +870,7 @@ static int sd_release(struct gendisk *disk, fmode_t mode) SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n")); - if (atomic_dec_return(&sdkp->openers) && sdev->removable) { + if (atomic_dec_return(&sdkp->openers) == 0 && sdev->removable) { if (scsi_block_when_processing_errors(sdev)) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } @@ -2144,7 +2144,7 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; unsigned char *buffer; - unsigned ordered; + unsigned flush = 0; SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_revalidate_disk\n")); @@ -2187,17 +2187,15 @@ static int sd_revalidate_disk(struct gendisk *disk) /* * We now have all cache related info, determine how we deal - * with ordered requests. Note that as the current SCSI - * dispatch function can alter request order, we cannot use - * QUEUE_ORDERED_TAG_* even when ordered tag is supported. + * with flush requests. */ - if (sdkp->WCE) - ordered = sdkp->DPOFUA - ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; - else - ordered = QUEUE_ORDERED_DRAIN; + if (sdkp->WCE) { + flush |= REQ_FLUSH; + if (sdkp->DPOFUA) + flush |= REQ_FUA; + } - blk_queue_ordered(sdkp->disk->queue, ordered); + blk_queue_flush(sdkp->disk->queue, flush); set_capacity(disk, sdkp->capacity); kfree(buffer); @@ -2661,15 +2659,15 @@ module_exit(exit_sd); static void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) { - sd_printk(KERN_INFO, sdkp, ""); + sd_printk(KERN_INFO, sdkp, " "); scsi_show_sense_hdr(sshdr); - sd_printk(KERN_INFO, sdkp, ""); + sd_printk(KERN_INFO, sdkp, " "); scsi_show_extd_sense(sshdr->asc, sshdr->ascq); } static void sd_print_result(struct scsi_disk *sdkp, int result) { - sd_printk(KERN_INFO, sdkp, ""); + sd_printk(KERN_INFO, sdkp, " "); scsi_show_result(result); } diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 78d6163..655ab92 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1657,7 +1657,7 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd) if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && !sfp->parentdp->device->host->unchecked_isa_dma && - blk_rq_aligned(q, hp->dxferp, dxfer_len)) + blk_rq_aligned(q, (unsigned long)hp->dxferp, dxfer_len)) md = NULL; else md = &map_data; diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index a7bc8b7..2c3e89d 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -72,10 +72,7 @@ static void sym_printl_hex(u_char *p, int n) static void sym_print_msg(struct sym_ccb *cp, char *label, u_char *msg) { - if (label) - sym_print_addr(cp->cmd, "%s: ", label); - else - sym_print_addr(cp->cmd, ""); + sym_print_addr(cp->cmd, "%s: ", label); spi_print_msg(msg); printf("\n"); @@ -4558,7 +4555,8 @@ static void sym_int_sir(struct sym_hcb *np) switch (np->msgin [2]) { case M_X_MODIFY_DP: if (DEBUG_FLAGS & DEBUG_POINTER) - sym_print_msg(cp, NULL, np->msgin); + sym_print_msg(cp, "extended msg ", + np->msgin); tmp = (np->msgin[3]<<24) + (np->msgin[4]<<16) + (np->msgin[5]<<8) + (np->msgin[6]); sym_modify_dp(np, tp, cp, tmp); @@ -4585,7 +4583,7 @@ static void sym_int_sir(struct sym_hcb *np) */ case M_IGN_RESIDUE: if (DEBUG_FLAGS & DEBUG_POINTER) - sym_print_msg(cp, NULL, np->msgin); + sym_print_msg(cp, "1 or 2 byte ", np->msgin); if (cp->host_flags & HF_SENSE) OUTL_DSP(np, SCRIPTA_BA(np, clrack)); else diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c index e57fb3d..5318dd3 100644 --- a/drivers/serial/bfin_sport_uart.c +++ b/drivers/serial/bfin_sport_uart.c @@ -121,7 +121,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int size, int baud_rate) unsigned int sclk = get_sclk(); /* Set TCR1 and TCR2, TFSR is not enabled for uart */ - SPORT_PUT_TCR1(up, (ITFS | TLSBIT | ITCLK)); + SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK)); SPORT_PUT_TCR2(up, size + 1); pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c index 8dedb26..c4399e2 100644 --- a/drivers/serial/mpc52xx_uart.c +++ b/drivers/serial/mpc52xx_uart.c @@ -500,6 +500,7 @@ static int __init mpc512x_psc_fifoc_init(void) psc_fifoc = of_iomap(np, 0); if (!psc_fifoc) { pr_err("%s: Can't map FIFOC\n", __func__); + of_node_put(np); return -ENODEV; } diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 141c695..7d475b2 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -335,8 +335,6 @@ static int serial_probe(struct pcmcia_device *link) info->p_dev = link; link->priv = info; - link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8; - link->resource[0]->end = 8; link->conf.Attributes = CONF_ENABLE_IRQ; if (do_sound) { link->conf.Attributes |= CONF_ENABLE_SPKR; @@ -411,6 +409,27 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, /*====================================================================*/ +static int pfc_config(struct pcmcia_device *p_dev) +{ + unsigned int port = 0; + struct serial_info *info = p_dev->priv; + + if ((p_dev->resource[1]->end != 0) && + (resource_size(p_dev->resource[1]) == 8)) { + port = p_dev->resource[1]->start; + info->slave = 1; + } else if ((info->manfid == MANFID_OSITECH) && + (resource_size(p_dev->resource[0]) == 0x40)) { + port = p_dev->resource[0]->start + 0x28; + info->slave = 1; + } + if (info->slave) + return setup_serial(p_dev, info, port, p_dev->irq); + + dev_warn(&p_dev->dev, "no usable port range found, giving up\n"); + return -ENODEV; +} + static int simple_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cf, cistpl_cftable_entry_t *dflt, @@ -461,23 +480,8 @@ static int simple_config(struct pcmcia_device *link) struct serial_info *info = link->priv; int i = -ENODEV, try; - /* If the card is already configured, look up the port and irq */ - if (link->function_config) { - unsigned int port = 0; - if ((link->resource[1]->end != 0) && - (resource_size(link->resource[1]) == 8)) { - port = link->resource[1]->end; - info->slave = 1; - } else if ((info->manfid == MANFID_OSITECH) && - (resource_size(link->resource[0]) == 0x40)) { - port = link->resource[0]->start + 0x28; - info->slave = 1; - } - if (info->slave) { - return setup_serial(link, info, port, - link->irq); - } - } + link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8; + link->resource[0]->end = 8; /* First pass: look for a config entry that looks normal. * Two tries: without IO aliases, then with aliases */ @@ -491,8 +495,7 @@ static int simple_config(struct pcmcia_device *link) if (!pcmcia_loop_config(link, simple_config_check_notpicky, NULL)) goto found_port; - printk(KERN_NOTICE - "serial_cs: no usable port range found, giving up\n"); + dev_warn(&link->dev, "no usable port range found, giving up\n"); return -1; found_port: @@ -558,6 +561,7 @@ static int multi_config(struct pcmcia_device *link) int i, base2 = 0; /* First, look for a generic full-sized window */ + link->resource[0]->flags |= IO_DATA_PATH_WIDTH_8; link->resource[0]->end = info->multi * 8; if (pcmcia_loop_config(link, multi_config_check, &base2)) { /* If that didn't work, look for two windows */ @@ -565,15 +569,14 @@ static int multi_config(struct pcmcia_device *link) info->multi = 2; if (pcmcia_loop_config(link, multi_config_check_notpicky, &base2)) { - printk(KERN_NOTICE "serial_cs: no usable port range" + dev_warn(&link->dev, "no usable port range " "found, giving up\n"); return -ENODEV; } } if (!link->irq) - dev_warn(&link->dev, - "serial_cs: no usable IRQ found, continuing...\n"); + dev_warn(&link->dev, "no usable IRQ found, continuing...\n"); /* * Apply any configuration quirks. @@ -675,6 +678,7 @@ static int serial_config(struct pcmcia_device * link) multifunction cards that ask for appropriate IO port ranges */ if ((info->multi == 0) && (link->has_func_id) && + (link->socket->pcmcia_pfc == 0) && ((link->func_id == CISTPL_FUNCID_MULTI) || (link->func_id == CISTPL_FUNCID_SERIAL))) pcmcia_loop_config(link, serial_check_for_multi, info); @@ -685,7 +689,13 @@ static int serial_config(struct pcmcia_device * link) if (info->quirk && info->quirk->multi != -1) info->multi = info->quirk->multi; - if (info->multi > 1) + dev_info(&link->dev, + "trying to set up [0x%04x:0x%04x] (pfc: %d, multi: %d, quirk: %p)\n", + link->manf_id, link->card_id, + link->socket->pcmcia_pfc, info->multi, info->quirk); + if (link->socket->pcmcia_pfc) + i = pfc_config(link); + else if (info->multi > 1) i = multi_config(link); else i = simple_config(link); @@ -704,7 +714,7 @@ static int serial_config(struct pcmcia_device * link) return 0; failed: - dev_warn(&link->dev, "serial_cs: failed to initialize\n"); + dev_warn(&link->dev, "failed to initialize\n"); serial_remove(link); return -ENODEV; } diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c index acd35d1..4c37c4e2 100644 --- a/drivers/spi/amba-pl022.c +++ b/drivers/spi/amba-pl022.c @@ -503,8 +503,9 @@ static void giveback(struct pl022 *pl022) msg->state = NULL; if (msg->complete) msg->complete(msg->context); - /* This message is completed, so let's turn off the clock! */ + /* This message is completed, so let's turn off the clocks! */ clk_disable(pl022->clk); + amba_pclk_disable(pl022->adev); } /** @@ -1139,9 +1140,10 @@ static void pump_messages(struct work_struct *work) /* Setup the SPI using the per chip configuration */ pl022->cur_chip = spi_get_ctldata(pl022->cur_msg->spi); /* - * We enable the clock here, then the clock will be disabled when + * We enable the clocks here, then the clocks will be disabled when * giveback() is called in each method (poll/interrupt/DMA) */ + amba_pclk_enable(pl022->adev); clk_enable(pl022->clk); restore_state(pl022); flush(pl022); @@ -1786,11 +1788,9 @@ pl022_probe(struct amba_device *adev, struct amba_id *id) } /* Disable SSP */ - clk_enable(pl022->clk); writew((readw(SSP_CR1(pl022->virtbase)) & (~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase)); load_ssp_default_config(pl022); - clk_disable(pl022->clk); status = request_irq(adev->irq[0], pl022_interrupt_handler, 0, "pl022", pl022); @@ -1818,6 +1818,8 @@ pl022_probe(struct amba_device *adev, struct amba_id *id) goto err_spi_register; } dev_dbg(dev, "probe succeded\n"); + /* Disable the silicon block pclk and clock it when needed */ + amba_pclk_disable(adev); return 0; err_spi_register: @@ -1879,9 +1881,9 @@ static int pl022_suspend(struct amba_device *adev, pm_message_t state) return status; } - clk_enable(pl022->clk); + amba_pclk_enable(adev); load_ssp_default_config(pl022); - clk_disable(pl022->clk); + amba_pclk_disable(adev); dev_dbg(&adev->dev, "suspended\n"); return 0; } @@ -1981,7 +1983,7 @@ static int __init pl022_init(void) return amba_driver_register(&pl022_driver); } -module_init(pl022_init); +subsys_initcall(pl022_init); static void __exit pl022_exit(void) { diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index d256cb0..5624785 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -181,10 +181,6 @@ static void flush(struct dw_spi *dws) wait_till_not_busy(dws); } -static void null_cs_control(u32 command) -{ -} - static int null_writer(struct dw_spi *dws) { u8 n_bytes = dws->n_bytes; @@ -322,7 +318,7 @@ static void giveback(struct dw_spi *dws) struct spi_transfer, transfer_list); - if (!last_transfer->cs_change) + if (!last_transfer->cs_change && dws->cs_control) dws->cs_control(MRST_SPI_DEASSERT); msg->state = NULL; @@ -396,6 +392,11 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) static irqreturn_t dw_spi_irq(int irq, void *dev_id) { struct dw_spi *dws = dev_id; + u16 irq_status, irq_mask = 0x3f; + + irq_status = dw_readw(dws, isr) & irq_mask; + if (!irq_status) + return IRQ_NONE; if (!dws->cur_msg) { spi_mask_intr(dws, SPI_INT_TXEI); @@ -544,13 +545,13 @@ static void pump_transfers(unsigned long data) */ if (dws->cs_control) { if (dws->rx && dws->tx) - chip->tmode = 0x00; + chip->tmode = SPI_TMOD_TR; else if (dws->rx) - chip->tmode = 0x02; + chip->tmode = SPI_TMOD_RO; else - chip->tmode = 0x01; + chip->tmode = SPI_TMOD_TO; - cr0 &= ~(0x3 << SPI_MODE_OFFSET); + cr0 &= ~SPI_TMOD_MASK; cr0 |= (chip->tmode << SPI_TMOD_OFFSET); } @@ -699,9 +700,6 @@ static int dw_spi_setup(struct spi_device *spi) chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL); if (!chip) return -ENOMEM; - - chip->cs_control = null_cs_control; - chip->enable_dma = 0; } /* @@ -883,7 +881,7 @@ int __devinit dw_spi_add_host(struct dw_spi *dws) dws->dma_inited = 0; dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60); - ret = request_irq(dws->irq, dw_spi_irq, 0, + ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, "dw_spi", dws); if (ret < 0) { dev_err(&master->dev, "can not get IRQ\n"); diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index a9e5c79..0bcf4c1 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -554,11 +554,9 @@ done: EXPORT_SYMBOL_GPL(spi_register_master); -static int __unregister(struct device *dev, void *master_dev) +static int __unregister(struct device *dev, void *null) { - /* note: before about 2.6.14-rc1 this would corrupt memory: */ - if (dev != master_dev) - spi_unregister_device(to_spi_device(dev)); + spi_unregister_device(to_spi_device(dev)); return 0; } @@ -576,8 +574,7 @@ void spi_unregister_master(struct spi_master *master) { int dummy; - dummy = device_for_each_child(master->dev.parent, &master->dev, - __unregister); + dummy = device_for_each_child(&master->dev, NULL, __unregister); device_unregister(&master->dev); } EXPORT_SYMBOL_GPL(spi_unregister_master); diff --git a/drivers/spi/spi_s3c64xx.c b/drivers/spi/spi_s3c64xx.c index 9736581..c3038da 100644 --- a/drivers/spi/spi_s3c64xx.c +++ b/drivers/spi/spi_s3c64xx.c @@ -200,6 +200,9 @@ static void flush_fifo(struct s3c64xx_spi_driver_data *sdd) val = readl(regs + S3C64XX_SPI_STATUS); } while (TX_FIFO_LVL(val, sci) && loops--); + if (loops == 0) + dev_warn(&sdd->pdev->dev, "Timed out flushing TX FIFO\n"); + /* Flush RxFIFO*/ loops = msecs_to_loops(1); do { @@ -210,6 +213,9 @@ static void flush_fifo(struct s3c64xx_spi_driver_data *sdd) break; } while (loops--); + if (loops == 0) + dev_warn(&sdd->pdev->dev, "Timed out flushing RX FIFO\n"); + val = readl(regs + S3C64XX_SPI_CH_CFG); val &= ~S3C64XX_SPI_CH_SW_RST; writel(val, regs + S3C64XX_SPI_CH_CFG); @@ -320,16 +326,17 @@ static int wait_for_xfer(struct s3c64xx_spi_driver_data *sdd, /* millisecs to xfer 'len' bytes @ 'cur_speed' */ ms = xfer->len * 8 * 1000 / sdd->cur_speed; - ms += 5; /* some tolerance */ + ms += 10; /* some tolerance */ if (dma_mode) { val = msecs_to_jiffies(ms) + 10; val = wait_for_completion_timeout(&sdd->xfer_completion, val); } else { + u32 status; val = msecs_to_loops(ms); do { - val = readl(regs + S3C64XX_SPI_STATUS); - } while (RX_FIFO_LVL(val, sci) < xfer->len && --val); + status = readl(regs + S3C64XX_SPI_STATUS); + } while (RX_FIFO_LVL(status, sci) < xfer->len && --val); } if (!val) @@ -447,8 +454,8 @@ static void s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd) writel(val, regs + S3C64XX_SPI_CLK_CFG); } -void s3c64xx_spi_dma_rxcb(struct s3c2410_dma_chan *chan, void *buf_id, - int size, enum s3c2410_dma_buffresult res) +static void s3c64xx_spi_dma_rxcb(struct s3c2410_dma_chan *chan, void *buf_id, + int size, enum s3c2410_dma_buffresult res) { struct s3c64xx_spi_driver_data *sdd = buf_id; unsigned long flags; @@ -467,8 +474,8 @@ void s3c64xx_spi_dma_rxcb(struct s3c2410_dma_chan *chan, void *buf_id, spin_unlock_irqrestore(&sdd->lock, flags); } -void s3c64xx_spi_dma_txcb(struct s3c2410_dma_chan *chan, void *buf_id, - int size, enum s3c2410_dma_buffresult res) +static void s3c64xx_spi_dma_txcb(struct s3c2410_dma_chan *chan, void *buf_id, + int size, enum s3c2410_dma_buffresult res) { struct s3c64xx_spi_driver_data *sdd = buf_id; unsigned long flags; @@ -508,8 +515,9 @@ static int s3c64xx_spi_map_mssg(struct s3c64xx_spi_driver_data *sdd, list_for_each_entry(xfer, &msg->transfers, transfer_list) { if (xfer->tx_buf != NULL) { - xfer->tx_dma = dma_map_single(dev, xfer->tx_buf, - xfer->len, DMA_TO_DEVICE); + xfer->tx_dma = dma_map_single(dev, + (void *)xfer->tx_buf, xfer->len, + DMA_TO_DEVICE); if (dma_mapping_error(dev, xfer->tx_dma)) { dev_err(dev, "dma_map_single Tx failed\n"); xfer->tx_dma = XFER_DMAADDR_INVALID; @@ -919,6 +927,13 @@ static int __init s3c64xx_spi_probe(struct platform_device *pdev) return -ENODEV; } + sci = pdev->dev.platform_data; + if (!sci->src_clk_name) { + dev_err(&pdev->dev, + "Board init must call s3c64xx_spi_set_info()\n"); + return -EINVAL; + } + /* Check for availability of necessary resource */ dmatx_res = platform_get_resource(pdev, IORESOURCE_DMA, 0); @@ -946,8 +961,6 @@ static int __init s3c64xx_spi_probe(struct platform_device *pdev) return -ENOMEM; } - sci = pdev->dev.platform_data; - platform_set_drvdata(pdev, master); sdd = spi_master_get_devdata(master); @@ -1170,7 +1183,7 @@ static int __init s3c64xx_spi_init(void) { return platform_driver_probe(&s3c64xx_spi_driver, s3c64xx_spi_probe); } -module_init(s3c64xx_spi_init); +subsys_initcall(s3c64xx_spi_init); static void __exit s3c64xx_spi_exit(void) { diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c index c6aa52f..48d9fb1 100644 --- a/drivers/staging/comedi/drivers/das08_cs.c +++ b/drivers/staging/comedi/drivers/das08_cs.c @@ -222,7 +222,6 @@ static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev, p_dev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH; p_dev->resource[0]->flags |= pcmcia_io_cfg_data_width(io->flags); - p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; p_dev->resource[0]->start = io->win[0].base; p_dev->resource[0]->end = io->win[0].len; if (io->nwin > 1) { diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c index 56e1157..64a0114 100644 --- a/drivers/staging/hv/netvsc_drv.c +++ b/drivers/staging/hv/netvsc_drv.c @@ -327,6 +327,9 @@ static const struct net_device_ops device_ops = { .ndo_stop = netvsc_close, .ndo_start_xmit = netvsc_start_xmit, .ndo_set_multicast_list = netvsc_set_multicast_list, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, }; static int netvsc_probe(struct device *device) diff --git a/drivers/staging/hv/ring_buffer.c b/drivers/staging/hv/ring_buffer.c index 17bc762..d78c569 100644 --- a/drivers/staging/hv/ring_buffer.c +++ b/drivers/staging/hv/ring_buffer.c @@ -193,8 +193,7 @@ Description: static inline u64 GetRingBufferIndices(struct hv_ring_buffer_info *RingInfo) { - return ((u64)RingInfo->RingBuffer->WriteIndex << 32) - || RingInfo->RingBuffer->ReadIndex; + return (u64)RingInfo->RingBuffer->WriteIndex << 32; } diff --git a/drivers/staging/hv/storvsc_api.h b/drivers/staging/hv/storvsc_api.h index 0063bde..8505a1c 100644 --- a/drivers/staging/hv/storvsc_api.h +++ b/drivers/staging/hv/storvsc_api.h @@ -28,10 +28,10 @@ #include "vmbus_api.h" /* Defines */ -#define STORVSC_RING_BUFFER_SIZE (10*PAGE_SIZE) +#define STORVSC_RING_BUFFER_SIZE (20*PAGE_SIZE) #define BLKVSC_RING_BUFFER_SIZE (20*PAGE_SIZE) -#define STORVSC_MAX_IO_REQUESTS 64 +#define STORVSC_MAX_IO_REQUESTS 128 /* * In Hyper-V, each port/path/target maps to 1 scsi host adapter. In diff --git a/drivers/staging/hv/storvsc_drv.c b/drivers/staging/hv/storvsc_drv.c index 075b61b..62882a4 100644 --- a/drivers/staging/hv/storvsc_drv.c +++ b/drivers/staging/hv/storvsc_drv.c @@ -495,7 +495,7 @@ static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl, /* ASSERT(orig_sgl[i].offset + orig_sgl[i].length <= PAGE_SIZE); */ - if (j == 0) + if (bounce_addr == 0) bounce_addr = (unsigned long)kmap_atomic(sg_page((&bounce_sgl[j])), KM_IRQ0); while (srclen) { @@ -556,7 +556,7 @@ static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl, destlen = orig_sgl[i].length; /* ASSERT(orig_sgl[i].offset + orig_sgl[i].length <= PAGE_SIZE); */ - if (j == 0) + if (bounce_addr == 0) bounce_addr = (unsigned long)kmap_atomic(sg_page((&bounce_sgl[j])), KM_IRQ0); while (destlen) { @@ -615,6 +615,7 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd, unsigned int request_size = 0; int i; struct scatterlist *sgl; + unsigned int sg_count = 0; DPRINT_DBG(STORVSC_DRV, "scmnd %p dir %d, use_sg %d buf %p len %d " "queue depth %d tagged %d", scmnd, scmnd->sc_data_direction, @@ -697,6 +698,7 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd, request->DataBuffer.Length = scsi_bufflen(scmnd); if (scsi_sg_count(scmnd)) { sgl = (struct scatterlist *)scsi_sglist(scmnd); + sg_count = scsi_sg_count(scmnd); /* check if we need to bounce the sgl */ if (do_bounce_buffer(sgl, scsi_sg_count(scmnd)) != -1) { @@ -731,15 +733,16 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd, scsi_sg_count(scmnd)); sgl = cmd_request->bounce_sgl; + sg_count = cmd_request->bounce_sgl_count; } request->DataBuffer.Offset = sgl[0].offset; - for (i = 0; i < scsi_sg_count(scmnd); i++) { + for (i = 0; i < sg_count; i++) { DPRINT_DBG(STORVSC_DRV, "sgl[%d] len %d offset %d\n", i, sgl[i].length, sgl[i].offset); request->DataBuffer.PfnArray[i] = - page_to_pfn(sg_page((&sgl[i]))); + page_to_pfn(sg_page((&sgl[i]))); } } else if (scsi_sglist(scmnd)) { /* ASSERT(scsi_bufflen(scmnd) <= PAGE_SIZE); */ diff --git a/drivers/staging/octeon/Kconfig b/drivers/staging/octeon/Kconfig index 638ad6b..9493128 100644 --- a/drivers/staging/octeon/Kconfig +++ b/drivers/staging/octeon/Kconfig @@ -1,6 +1,6 @@ config OCTEON_ETHERNET tristate "Cavium Networks Octeon Ethernet support" - depends on CPU_CAVIUM_OCTEON + depends on CPU_CAVIUM_OCTEON && NETDEVICES select PHYLIB select MDIO_OCTEON help diff --git a/drivers/staging/rt2860/usb_main_dev.c b/drivers/staging/rt2860/usb_main_dev.c index a0fe31d..ebf9074 100644 --- a/drivers/staging/rt2860/usb_main_dev.c +++ b/drivers/staging/rt2860/usb_main_dev.c @@ -44,6 +44,7 @@ struct usb_device_id rtusb_usb_id[] = { {USB_DEVICE(0x07B8, 0x2870)}, /* AboCom */ {USB_DEVICE(0x07B8, 0x2770)}, /* AboCom */ {USB_DEVICE(0x0DF6, 0x0039)}, /* Sitecom 2770 */ + {USB_DEVICE(0x0DF6, 0x003F)}, /* Sitecom 2770 */ {USB_DEVICE(0x083A, 0x7512)}, /* Arcadyan 2770 */ {USB_DEVICE(0x0789, 0x0162)}, /* Logitec 2870 */ {USB_DEVICE(0x0789, 0x0163)}, /* Logitec 2870 */ @@ -95,7 +96,8 @@ struct usb_device_id rtusb_usb_id[] = { {USB_DEVICE(0x050d, 0x815c)}, {USB_DEVICE(0x1482, 0x3C09)}, /* Abocom */ {USB_DEVICE(0x14B2, 0x3C09)}, /* Alpha */ - {USB_DEVICE(0x04E8, 0x2018)}, /* samsung */ + {USB_DEVICE(0x04E8, 0x2018)}, /* samsung linkstick2 */ + {USB_DEVICE(0x1690, 0x0740)}, /* Askey */ {USB_DEVICE(0x5A57, 0x0280)}, /* Zinwell */ {USB_DEVICE(0x5A57, 0x0282)}, /* Zinwell */ {USB_DEVICE(0x7392, 0x7718)}, @@ -105,21 +107,34 @@ struct usb_device_id rtusb_usb_id[] = { {USB_DEVICE(0x1737, 0x0071)}, /* Linksys WUSB600N */ {USB_DEVICE(0x0411, 0x00e8)}, /* Buffalo WLI-UC-G300N */ {USB_DEVICE(0x050d, 0x815c)}, /* Belkin F5D8053 */ + {USB_DEVICE(0x100D, 0x9031)}, /* Motorola 2770 */ #endif /* RT2870 // */ #ifdef RT3070 {USB_DEVICE(0x148F, 0x3070)}, /* Ralink 3070 */ {USB_DEVICE(0x148F, 0x3071)}, /* Ralink 3071 */ {USB_DEVICE(0x148F, 0x3072)}, /* Ralink 3072 */ {USB_DEVICE(0x0DB0, 0x3820)}, /* Ralink 3070 */ + {USB_DEVICE(0x0DB0, 0x871C)}, /* Ralink 3070 */ + {USB_DEVICE(0x0DB0, 0x822C)}, /* Ralink 3070 */ + {USB_DEVICE(0x0DB0, 0x871B)}, /* Ralink 3070 */ + {USB_DEVICE(0x0DB0, 0x822B)}, /* Ralink 3070 */ {USB_DEVICE(0x0DF6, 0x003E)}, /* Sitecom 3070 */ {USB_DEVICE(0x0DF6, 0x0042)}, /* Sitecom 3072 */ + {USB_DEVICE(0x0DF6, 0x0048)}, /* Sitecom 3070 */ + {USB_DEVICE(0x0DF6, 0x0047)}, /* Sitecom 3071 */ {USB_DEVICE(0x14B2, 0x3C12)}, /* AL 3070 */ {USB_DEVICE(0x18C5, 0x0012)}, /* Corega 3070 */ {USB_DEVICE(0x083A, 0x7511)}, /* Arcadyan 3070 */ + {USB_DEVICE(0x083A, 0xA701)}, /* SMC 3070 */ + {USB_DEVICE(0x083A, 0xA702)}, /* SMC 3072 */ {USB_DEVICE(0x1740, 0x9703)}, /* EnGenius 3070 */ {USB_DEVICE(0x1740, 0x9705)}, /* EnGenius 3071 */ {USB_DEVICE(0x1740, 0x9706)}, /* EnGenius 3072 */ + {USB_DEVICE(0x1740, 0x9707)}, /* EnGenius 3070 */ + {USB_DEVICE(0x1740, 0x9708)}, /* EnGenius 3071 */ + {USB_DEVICE(0x1740, 0x9709)}, /* EnGenius 3072 */ {USB_DEVICE(0x13D3, 0x3273)}, /* AzureWave 3070 */ + {USB_DEVICE(0x13D3, 0x3305)}, /* AzureWave 3070*/ {USB_DEVICE(0x1044, 0x800D)}, /* Gigabyte GN-WB32L 3070 */ {USB_DEVICE(0x2019, 0xAB25)}, /* Planex Communications, Inc. RT3070 */ {USB_DEVICE(0x07B8, 0x3070)}, /* AboCom 3070 */ @@ -132,14 +147,36 @@ struct usb_device_id rtusb_usb_id[] = { {USB_DEVICE(0x07D1, 0x3C0D)}, /* D-Link 3070 */ {USB_DEVICE(0x07D1, 0x3C0E)}, /* D-Link 3070 */ {USB_DEVICE(0x07D1, 0x3C0F)}, /* D-Link 3070 */ + {USB_DEVICE(0x07D1, 0x3C16)}, /* D-Link 3070 */ + {USB_DEVICE(0x07D1, 0x3C17)}, /* D-Link 8070 */ {USB_DEVICE(0x1D4D, 0x000C)}, /* Pegatron Corporation 3070 */ {USB_DEVICE(0x1D4D, 0x000E)}, /* Pegatron Corporation 3070 */ {USB_DEVICE(0x5A57, 0x5257)}, /* Zinwell 3070 */ {USB_DEVICE(0x5A57, 0x0283)}, /* Zinwell 3072 */ {USB_DEVICE(0x04BB, 0x0945)}, /* I-O DATA 3072 */ + {USB_DEVICE(0x04BB, 0x0947)}, /* I-O DATA 3070 */ + {USB_DEVICE(0x04BB, 0x0948)}, /* I-O DATA 3072 */ {USB_DEVICE(0x203D, 0x1480)}, /* Encore 3070 */ + {USB_DEVICE(0x20B8, 0x8888)}, /* PARA INDUSTRIAL 3070 */ + {USB_DEVICE(0x0B05, 0x1784)}, /* Asus 3072 */ + {USB_DEVICE(0x203D, 0x14A9)}, /* Encore 3070*/ + {USB_DEVICE(0x0DB0, 0x899A)}, /* MSI 3070*/ + {USB_DEVICE(0x0DB0, 0x3870)}, /* MSI 3070*/ + {USB_DEVICE(0x0DB0, 0x870A)}, /* MSI 3070*/ + {USB_DEVICE(0x0DB0, 0x6899)}, /* MSI 3070 */ + {USB_DEVICE(0x0DB0, 0x3822)}, /* MSI 3070 */ + {USB_DEVICE(0x0DB0, 0x3871)}, /* MSI 3070 */ + {USB_DEVICE(0x0DB0, 0x871A)}, /* MSI 3070 */ + {USB_DEVICE(0x0DB0, 0x822A)}, /* MSI 3070 */ + {USB_DEVICE(0x0DB0, 0x3821)}, /* Ralink 3070 */ + {USB_DEVICE(0x0DB0, 0x821A)}, /* Ralink 3070 */ + {USB_DEVICE(0x083A, 0xA703)}, /* IO-MAGIC */ + {USB_DEVICE(0x13D3, 0x3307)}, /* Azurewave */ + {USB_DEVICE(0x13D3, 0x3321)}, /* Azurewave */ + {USB_DEVICE(0x07FA, 0x7712)}, /* Edimax */ + {USB_DEVICE(0x0789, 0x0166)}, /* Edimax */ + {USB_DEVICE(0x148F, 0x2070)}, /* Edimax */ #endif /* RT3070 // */ - {USB_DEVICE(0x0DF6, 0x003F)}, /* Sitecom WL-608 */ {USB_DEVICE(0x1737, 0x0077)}, /* Linksys WUSB54GC-EU v3 */ {USB_DEVICE(0x2001, 0x3C09)}, /* D-Link */ {USB_DEVICE(0x2001, 0x3C0A)}, /* D-Link 3072 */ diff --git a/drivers/staging/spectra/Kconfig b/drivers/staging/spectra/Kconfig index 5e2ffef..d231ae2 100644 --- a/drivers/staging/spectra/Kconfig +++ b/drivers/staging/spectra/Kconfig @@ -2,6 +2,7 @@ menuconfig SPECTRA tristate "Denali Spectra Flash Translation Layer" depends on BLOCK + depends on X86_MRST default n ---help--- Enable the FTL pseudo-filesystem used with the NAND Flash diff --git a/drivers/staging/spectra/ffsport.c b/drivers/staging/spectra/ffsport.c index 44a7fbe..fa21a0f 100644 --- a/drivers/staging/spectra/ffsport.c +++ b/drivers/staging/spectra/ffsport.c @@ -28,6 +28,7 @@ #include #include #include +#include /**** Helper functions used for Div, Remainder operation on u64 ****/ diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index 368c30a..4af83d5 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -219,6 +219,7 @@ int prism2_get_key(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; params.key_len = len; params.key = wlandev->wep_keys[key_index]; + params.seq_len = 0; callback(cookie, ¶ms); @@ -735,6 +736,8 @@ struct wiphy *wlan_create_wiphy(struct device *dev, wlandevice_t *wlandev) priv->band.n_channels = ARRAY_SIZE(prism2_channels); priv->band.bitrates = priv->rates; priv->band.n_bitrates = ARRAY_SIZE(prism2_rates); + priv->band.band = IEEE80211_BAND_2GHZ; + priv->band.ht_cap.ht_supported = false; wiphy->bands[IEEE80211_BAND_2GHZ] = &priv->band; set_wiphy_dev(wiphy, dev); diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 77d4d71..722c840 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -769,6 +769,7 @@ static int __init zram_init(void) free_devices: while (dev_id) destroy_device(&devices[--dev_id]); + kfree(devices); unregister: unregister_blkdev(zram_major, "zram"); out: diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 593fc5e..5af23cc 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -1127,6 +1127,7 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, { struct cxacru_data *instance; struct usb_device *usb_dev = interface_to_usbdev(intf); + struct usb_host_endpoint *cmd_ep = usb_dev->ep_in[CXACRU_EP_CMD]; int ret; /* instance init */ @@ -1171,15 +1172,34 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, goto fail; } - usb_fill_int_urb(instance->rcv_urb, + if (!cmd_ep) { + dbg("cxacru_bind: no command endpoint"); + ret = -ENODEV; + goto fail; + } + + if ((cmd_ep->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) + == USB_ENDPOINT_XFER_INT) { + usb_fill_int_urb(instance->rcv_urb, usb_dev, usb_rcvintpipe(usb_dev, CXACRU_EP_CMD), instance->rcv_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->rcv_done, 1); - usb_fill_int_urb(instance->snd_urb, + usb_fill_int_urb(instance->snd_urb, usb_dev, usb_sndintpipe(usb_dev, CXACRU_EP_CMD), instance->snd_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->snd_done, 4); + } else { + usb_fill_bulk_urb(instance->rcv_urb, + usb_dev, usb_rcvbulkpipe(usb_dev, CXACRU_EP_CMD), + instance->rcv_buf, PAGE_SIZE, + cxacru_blocking_completion, &instance->rcv_done); + + usb_fill_bulk_urb(instance->snd_urb, + usb_dev, usb_sndbulkpipe(usb_dev, CXACRU_EP_CMD), + instance->snd_buf, PAGE_SIZE, + cxacru_blocking_completion, &instance->snd_done); + } mutex_init(&instance->cm_serialize); diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 1833b3a..bc62fae 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -965,7 +965,8 @@ static int acm_probe(struct usb_interface *intf, } if (!buflen) { - if (intf->cur_altsetting->endpoint->extralen && + if (intf->cur_altsetting->endpoint && + intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) { dev_dbg(&intf->dev, "Seeking extra descriptors on endpoint\n"); @@ -1481,6 +1482,11 @@ static int acm_reset_resume(struct usb_interface *intf) USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \ USB_CDC_ACM_PROTO_VENDOR) +#define SAMSUNG_PCSUITE_ACM_INFO(x) \ + USB_DEVICE_AND_INTERFACE_INFO(0x04e7, x, \ + USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \ + USB_CDC_ACM_PROTO_VENDOR) + /* * USB driver structure. */ @@ -1591,6 +1597,17 @@ static const struct usb_device_id acm_ids[] = { { NOKIA_PCSUITE_ACM_INFO(0x0108), }, /* Nokia 5320 XpressMusic 2G */ { NOKIA_PCSUITE_ACM_INFO(0x01f5), }, /* Nokia N97, RM-505 */ { NOKIA_PCSUITE_ACM_INFO(0x02e3), }, /* Nokia 5230, RM-588 */ + { NOKIA_PCSUITE_ACM_INFO(0x0178), }, /* Nokia E63 */ + { NOKIA_PCSUITE_ACM_INFO(0x010e), }, /* Nokia E75 */ + { NOKIA_PCSUITE_ACM_INFO(0x02d9), }, /* Nokia 6760 Slide */ + { NOKIA_PCSUITE_ACM_INFO(0x01d0), }, /* Nokia E52 */ + { NOKIA_PCSUITE_ACM_INFO(0x0223), }, /* Nokia E72 */ + { NOKIA_PCSUITE_ACM_INFO(0x0275), }, /* Nokia X6 */ + { NOKIA_PCSUITE_ACM_INFO(0x026c), }, /* Nokia N97 Mini */ + { NOKIA_PCSUITE_ACM_INFO(0x0154), }, /* Nokia 5800 XpressMusic */ + { NOKIA_PCSUITE_ACM_INFO(0x04ce), }, /* Nokia E90 */ + { NOKIA_PCSUITE_ACM_INFO(0x01d4), }, /* Nokia E55 */ + { SAMSUNG_PCSUITE_ACM_INFO(0x6651), }, /* Samsung GTi8510 (INNOV8) */ /* NOTE: non-Nokia COMM/ACM/0xff is likely MSFT RNDIS... NOT a modem! */ @@ -1599,6 +1616,10 @@ static const struct usb_device_id acm_ids[] = { .driver_info = NOT_A_MODEM, }, + /* control interfaces without any protocol set */ + { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, + USB_CDC_PROTO_NONE) }, + /* control interfaces with various AT-command sets */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_V25TER) }, diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index fd4c36e..844683e 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1724,6 +1724,15 @@ free_interfaces: if (ret) goto free_interfaces; + /* if it's already configured, clear out old state first. + * getting rid of old interfaces means unbinding their drivers. + */ + if (dev->state != USB_STATE_ADDRESS) + usb_disable_device(dev, 1); /* Skip ep0 */ + + /* Get rid of pending async Set-Config requests for this device */ + cancel_async_set_config(dev); + /* Make sure we have bandwidth (and available HCD resources) for this * configuration. Remove endpoints from the schedule if we're dropping * this configuration to set configuration 0. After this point, the @@ -1733,20 +1742,11 @@ free_interfaces: mutex_lock(&hcd->bandwidth_mutex); ret = usb_hcd_alloc_bandwidth(dev, cp, NULL, NULL); if (ret < 0) { - usb_autosuspend_device(dev); mutex_unlock(&hcd->bandwidth_mutex); + usb_autosuspend_device(dev); goto free_interfaces; } - /* if it's already configured, clear out old state first. - * getting rid of old interfaces means unbinding their drivers. - */ - if (dev->state != USB_STATE_ADDRESS) - usb_disable_device(dev, 1); /* Skip ep0 */ - - /* Get rid of pending async Set-Config requests for this device */ - cancel_async_set_config(dev); - ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), USB_REQ_SET_CONFIGURATION, 0, configuration, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); @@ -1761,8 +1761,8 @@ free_interfaces: if (!cp) { usb_set_device_state(dev, USB_STATE_ADDRESS); usb_hcd_alloc_bandwidth(dev, NULL, NULL, NULL); - usb_autosuspend_device(dev); mutex_unlock(&hcd->bandwidth_mutex); + usb_autosuspend_device(dev); goto free_interfaces; } mutex_unlock(&hcd->bandwidth_mutex); diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c index 020fa5a..972d5dd 100644 --- a/drivers/usb/gadget/rndis.c +++ b/drivers/usb/gadget/rndis.c @@ -293,9 +293,13 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len, /* mandatory */ case OID_GEN_VENDOR_DESCRIPTION: pr_debug("%s: OID_GEN_VENDOR_DESCRIPTION\n", __func__); - length = strlen (rndis_per_dev_params [configNr].vendorDescr); - memcpy (outbuf, - rndis_per_dev_params [configNr].vendorDescr, length); + if ( rndis_per_dev_params [configNr].vendorDescr ) { + length = strlen (rndis_per_dev_params [configNr].vendorDescr); + memcpy (outbuf, + rndis_per_dev_params [configNr].vendorDescr, length); + } else { + outbuf[0] = 0; + } retval = 0; break; @@ -1148,7 +1152,7 @@ static struct proc_dir_entry *rndis_connect_state [RNDIS_MAX_CONFIGS]; #endif /* CONFIG_USB_GADGET_DEBUG_FILES */ -int __init rndis_init (void) +int rndis_init(void) { u8 i; diff --git a/drivers/usb/gadget/rndis.h b/drivers/usb/gadget/rndis.h index c236aaa..907c330 100644 --- a/drivers/usb/gadget/rndis.h +++ b/drivers/usb/gadget/rndis.h @@ -262,7 +262,7 @@ int rndis_signal_disconnect (int configNr); int rndis_state (int configNr); extern void rndis_set_host_mac (int configNr, const u8 *addr); -int __devinit rndis_init (void); +int rndis_init(void); void rndis_exit (void); #endif /* _LINUX_RNDIS_H */ diff --git a/drivers/usb/gadget/s3c-hsotg.c b/drivers/usb/gadget/s3c-hsotg.c index 521ebed..a229744 100644 --- a/drivers/usb/gadget/s3c-hsotg.c +++ b/drivers/usb/gadget/s3c-hsotg.c @@ -12,8 +12,6 @@ * published by the Free Software Foundation. */ -#define DEBUG - #include #include #include diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c index 335ee69..ba52be4 100644 --- a/drivers/usb/host/ehci-ppc-of.c +++ b/drivers/usb/host/ehci-ppc-of.c @@ -192,17 +192,19 @@ ehci_hcd_ppc_of_probe(struct platform_device *op, const struct of_device_id *mat } rv = usb_add_hcd(hcd, irq, 0); - if (rv == 0) - return 0; + if (rv) + goto err_ehci; + + return 0; +err_ehci: + if (ehci->has_amcc_usb23) + iounmap(ehci->ohci_hcctrl_reg); iounmap(hcd->regs); err_ioremap: irq_dispose_mapping(irq); err_irq: release_mem_region(hcd->rsrc_start, hcd->rsrc_len); - - if (ehci->has_amcc_usb23) - iounmap(ehci->ohci_hcctrl_reg); err_rmr: usb_put_hcd(hcd); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 80bf833..4f1744c 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -56,6 +56,7 @@ static int debug; static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ @@ -88,6 +89,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8149) }, /* West Mountain Radio Computerized Battery Analyzer */ { USB_DEVICE(0x10C4, 0x814A) }, /* West Mountain Radio RIGblaster P&P */ { USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */ + { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */ { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */ { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */ { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */ @@ -109,6 +111,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ + { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */ @@ -122,14 +125,14 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */ { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ - { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ - { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ - { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ - { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */ { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */ { USB_DEVICE(0x16DC, 0x0012) }, /* W-IE-NE-R Plein & Baus GmbH MPOD Multi Channel Power Supply */ { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */ + { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ + { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ + { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ { } /* Terminating Entry */ }; diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c792c96..97cc87d 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -753,6 +753,14 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) }, { USB_DEVICE(IONICS_VID, IONICS_PLUGCOMPUTER_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_24_MASTER_WING_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_PC_WING_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_USB_DMX_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MIDI_TIMECODE_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MINI_WING_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MAXI_WING_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MEDIA_WING_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_WING_PID) }, { }, /* Optional parameter entry */ { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 2e95857..15a4583 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -135,6 +135,18 @@ #define FTDI_NDI_AURORA_SCU_PID 0xDA74 /* NDI Aurora SCU */ /* + * ChamSys Limited (www.chamsys.co.uk) USB wing/interface product IDs + */ +#define FTDI_CHAMSYS_24_MASTER_WING_PID 0xDAF8 +#define FTDI_CHAMSYS_PC_WING_PID 0xDAF9 +#define FTDI_CHAMSYS_USB_DMX_PID 0xDAFA +#define FTDI_CHAMSYS_MIDI_TIMECODE_PID 0xDAFB +#define FTDI_CHAMSYS_MINI_WING_PID 0xDAFC +#define FTDI_CHAMSYS_MAXI_WING_PID 0xDAFD +#define FTDI_CHAMSYS_MEDIA_WING_PID 0xDAFE +#define FTDI_CHAMSYS_WING_PID 0xDAFF + +/* * Westrex International devices submitted by Cory Lee */ #define FTDI_WESTREX_MODEL_777_PID 0xDC00 /* Model 777 */ diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 585b7e6..1c9b6e9 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -119,16 +119,20 @@ * by making a change here, in moschip_port_id_table, and in * moschip_id_table_combined */ -#define USB_VENDOR_ID_BANDB 0x0856 -#define BANDB_DEVICE_ID_USO9ML2_2 0xAC22 -#define BANDB_DEVICE_ID_USO9ML2_4 0xAC24 -#define BANDB_DEVICE_ID_US9ML2_2 0xAC29 -#define BANDB_DEVICE_ID_US9ML2_4 0xAC30 -#define BANDB_DEVICE_ID_USPTL4_2 0xAC31 -#define BANDB_DEVICE_ID_USPTL4_4 0xAC32 -#define BANDB_DEVICE_ID_USOPTL4_2 0xAC42 -#define BANDB_DEVICE_ID_USOPTL4_4 0xAC44 -#define BANDB_DEVICE_ID_USOPTL2_4 0xAC24 +#define USB_VENDOR_ID_BANDB 0x0856 +#define BANDB_DEVICE_ID_USO9ML2_2 0xAC22 +#define BANDB_DEVICE_ID_USO9ML2_2P 0xBC00 +#define BANDB_DEVICE_ID_USO9ML2_4 0xAC24 +#define BANDB_DEVICE_ID_USO9ML2_4P 0xBC01 +#define BANDB_DEVICE_ID_US9ML2_2 0xAC29 +#define BANDB_DEVICE_ID_US9ML2_4 0xAC30 +#define BANDB_DEVICE_ID_USPTL4_2 0xAC31 +#define BANDB_DEVICE_ID_USPTL4_4 0xAC32 +#define BANDB_DEVICE_ID_USOPTL4_2 0xAC42 +#define BANDB_DEVICE_ID_USOPTL4_2P 0xBC02 +#define BANDB_DEVICE_ID_USOPTL4_4 0xAC44 +#define BANDB_DEVICE_ID_USOPTL4_4P 0xBC03 +#define BANDB_DEVICE_ID_USOPTL2_4 0xAC24 /* This driver also supports * ATEN UC2324 device using Moschip MCS7840 @@ -184,13 +188,17 @@ static const struct usb_device_id moschip_port_id_table[] = { {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)}, {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_2)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_4)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_2)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_4)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)}, @@ -201,13 +209,17 @@ static const struct usb_device_id moschip_id_table_combined[] __devinitconst = { {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)}, {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_2)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_4)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_2)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_4)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)}, + {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)}, {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)}, diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index adcbdb9..c46911a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -164,6 +164,14 @@ static void option_instat_callback(struct urb *urb); #define YISO_VENDOR_ID 0x0EAB #define YISO_PRODUCT_U893 0xC893 +/* + * NOVATEL WIRELESS PRODUCTS + * + * Note from Novatel Wireless: + * If your Novatel modem does not work on linux, don't + * change the option module, but check our website. If + * that does not help, contact ddeschepper@nvtl.com +*/ /* MERLIN EVDO PRODUCTS */ #define NOVATELWIRELESS_PRODUCT_V640 0x1100 #define NOVATELWIRELESS_PRODUCT_V620 0x1110 @@ -185,24 +193,39 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_EU730 0x2400 #define NOVATELWIRELESS_PRODUCT_EU740 0x2410 #define NOVATELWIRELESS_PRODUCT_EU870D 0x2420 - /* OVATION PRODUCTS */ #define NOVATELWIRELESS_PRODUCT_MC727 0x4100 #define NOVATELWIRELESS_PRODUCT_MC950D 0x4400 -#define NOVATELWIRELESS_PRODUCT_U727 0x5010 -#define NOVATELWIRELESS_PRODUCT_MC727_NEW 0x5100 -#define NOVATELWIRELESS_PRODUCT_MC760 0x6000 +/* + * Note from Novatel Wireless: + * All PID in the 5xxx range are currently reserved for + * auto-install CDROMs, and should not be added to this + * module. + * + * #define NOVATELWIRELESS_PRODUCT_U727 0x5010 + * #define NOVATELWIRELESS_PRODUCT_MC727_NEW 0x5100 +*/ #define NOVATELWIRELESS_PRODUCT_OVMC760 0x6002 - -/* FUTURE NOVATEL PRODUCTS */ -#define NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED 0X6001 -#define NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED 0X7000 -#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED 0X7001 -#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED 0X8000 -#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0X8001 -#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0X9000 -#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0X9001 -#define NOVATELWIRELESS_PRODUCT_GLOBAL 0XA001 +#define NOVATELWIRELESS_PRODUCT_MC780 0x6010 +#define NOVATELWIRELESS_PRODUCT_EVDO_FULLSPEED 0x6000 +#define NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED 0x6001 +#define NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED 0x7000 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED 0x7001 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED3 0x7003 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED4 0x7004 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED5 0x7005 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED6 0x7006 +#define NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED7 0x7007 +#define NOVATELWIRELESS_PRODUCT_MC996D 0x7030 +#define NOVATELWIRELESS_PRODUCT_MF3470 0x7041 +#define NOVATELWIRELESS_PRODUCT_MC547 0x7042 +#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED 0x8000 +#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0x8001 +#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000 +#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 +#define NOVATELWIRELESS_PRODUCT_G1 0xA001 +#define NOVATELWIRELESS_PRODUCT_G1_M 0xA002 +#define NOVATELWIRELESS_PRODUCT_G2 0xA010 /* AMOI PRODUCTS */ #define AMOI_VENDOR_ID 0x1614 @@ -490,36 +513,44 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) }, { USB_DEVICE(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC) }, - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, /* Novatel Merlin V640/XV620 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) }, /* Novatel Merlin V620/S620 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V740) }, /* Novatel Merlin EX720/V740/X720 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V720) }, /* Novatel Merlin V720/S720/PC720 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U730) }, /* Novatel U730/U740 (VF version) */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U740) }, /* Novatel U740 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U870) }, /* Novatel U870 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_XU870) }, /* Novatel Merlin XU870 HSDPA/3G */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_X950D) }, /* Novatel X950D */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EV620) }, /* Novatel EV620/ES620 CDMA/EV-DO */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_ES720) }, /* Novatel ES620/ES720/U720/USB720 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E725) }, /* Novatel E725/E726 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_ES620) }, /* Novatel Merlin ES620 SM Bus */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU730) }, /* Novatel EU730 and Vodafone EU740 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU740) }, /* Novatel non-Vodafone EU740 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU870D) }, /* Novatel EU850D/EU860D/EU870D */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC950D) }, /* Novatel MC930D/MC950D */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC727) }, /* Novatel MC727/U727/USB727 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC727_NEW) }, /* Novatel MC727/U727/USB727 refresh */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U727) }, /* Novatel MC727/U727/USB727 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC760) }, /* Novatel MC760/U760/USB760 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_OVMC760) }, /* Novatel Ovation MC760 */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED) }, /* Novatel HSPA product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED) }, /* Novatel EVDO Embedded product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED) }, /* Novatel HSPA Embedded product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED) }, /* Novatel EVDO product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED) }, /* Novatel HSPA product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED) }, /* Novatel EVDO Embedded product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED) }, /* Novatel HSPA Embedded product */ - { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_GLOBAL) }, /* Novatel Global product */ + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V740) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V720) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U730) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U740) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U870) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_XU870) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_X950D) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EV620) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_ES720) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E725) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_ES620) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU730) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU740) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EU870D) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC950D) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC727) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_OVMC760) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC780) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_FULLSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_FULLSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_FULLSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_HIGHSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED3) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED4) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED5) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED6) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_HIGHSPEED7) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC996D) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MF3470) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC547) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G1_M) }, + { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, diff --git a/drivers/usb/serial/ssu100.c b/drivers/usb/serial/ssu100.c index 68c18fd..e986002 100644 --- a/drivers/usb/serial/ssu100.c +++ b/drivers/usb/serial/ssu100.c @@ -46,7 +46,7 @@ #define FULLPWRBIT 0x00000080 #define NEXT_BOARD_POWER_BIT 0x00000004 -static int debug = 1; +static int debug; /* Version Information */ #define DRIVER_VERSION "v0.1" diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e05557d..c579dcc 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -60,22 +60,25 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync, return 0; } +static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) +{ + INIT_LIST_HEAD(&work->node); + work->fn = fn; + init_waitqueue_head(&work->done); + work->flushing = 0; + work->queue_seq = work->done_seq = 0; +} + /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, unsigned long mask, struct vhost_dev *dev) { - struct vhost_work *work = &poll->work; - init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; poll->dev = dev; - INIT_LIST_HEAD(&work->node); - work->fn = fn; - init_waitqueue_head(&work->done); - work->flushing = 0; - work->queue_seq = work->done_seq = 0; + vhost_work_init(&poll->work, fn); } /* Start polling a file. We add ourselves to file's wait queue. The caller must @@ -95,35 +98,38 @@ void vhost_poll_stop(struct vhost_poll *poll) remove_wait_queue(poll->wqh, &poll->wait); } -/* Flush any work that has been scheduled. When calling this, don't hold any - * locks that are also used by the callback. */ -void vhost_poll_flush(struct vhost_poll *poll) +static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) { - struct vhost_work *work = &poll->work; unsigned seq; int left; int flushing; - spin_lock_irq(&poll->dev->work_lock); + spin_lock_irq(&dev->work_lock); seq = work->queue_seq; work->flushing++; - spin_unlock_irq(&poll->dev->work_lock); + spin_unlock_irq(&dev->work_lock); wait_event(work->done, ({ - spin_lock_irq(&poll->dev->work_lock); + spin_lock_irq(&dev->work_lock); left = seq - work->done_seq <= 0; - spin_unlock_irq(&poll->dev->work_lock); + spin_unlock_irq(&dev->work_lock); left; })); - spin_lock_irq(&poll->dev->work_lock); + spin_lock_irq(&dev->work_lock); flushing = --work->flushing; - spin_unlock_irq(&poll->dev->work_lock); + spin_unlock_irq(&dev->work_lock); BUG_ON(flushing < 0); } -void vhost_poll_queue(struct vhost_poll *poll) +/* Flush any work that has been scheduled. When calling this, don't hold any + * locks that are also used by the callback. */ +void vhost_poll_flush(struct vhost_poll *poll) +{ + vhost_work_flush(poll->dev, &poll->work); +} + +static inline void vhost_work_queue(struct vhost_dev *dev, + struct vhost_work *work) { - struct vhost_dev *dev = poll->dev; - struct vhost_work *work = &poll->work; unsigned long flags; spin_lock_irqsave(&dev->work_lock, flags); @@ -135,6 +141,11 @@ void vhost_poll_queue(struct vhost_poll *poll) spin_unlock_irqrestore(&dev->work_lock, flags); } +void vhost_poll_queue(struct vhost_poll *poll) +{ + vhost_work_queue(poll->dev, &poll->work); +} + static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -236,6 +247,29 @@ long vhost_dev_check_owner(struct vhost_dev *dev) return dev->mm == current->mm ? 0 : -EPERM; } +struct vhost_attach_cgroups_struct { + struct vhost_work work; + struct task_struct *owner; + int ret; +}; + +static void vhost_attach_cgroups_work(struct vhost_work *work) +{ + struct vhost_attach_cgroups_struct *s; + s = container_of(work, struct vhost_attach_cgroups_struct, work); + s->ret = cgroup_attach_task_all(s->owner, current); +} + +static int vhost_attach_cgroups(struct vhost_dev *dev) +{ + struct vhost_attach_cgroups_struct attach; + attach.owner = current; + vhost_work_init(&attach.work, vhost_attach_cgroups_work); + vhost_work_queue(dev, &attach.work); + vhost_work_flush(dev, &attach.work); + return attach.ret; +} + /* Caller should have device mutex */ static long vhost_dev_set_owner(struct vhost_dev *dev) { @@ -255,14 +289,16 @@ static long vhost_dev_set_owner(struct vhost_dev *dev) } dev->worker = worker; - err = cgroup_attach_task_current_cg(worker); + wake_up_process(worker); /* avoid contributing to loadavg */ + + err = vhost_attach_cgroups(dev); if (err) goto err_cgroup; - wake_up_process(worker); /* avoid contributing to loadavg */ return 0; err_cgroup: kthread_stop(worker); + dev->worker = NULL; err_worker: if (dev->mm) mmput(dev->mm); @@ -323,7 +359,10 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->mm = NULL; WARN_ON(!list_empty(&dev->work_list)); - kthread_stop(dev->worker); + if (dev->worker) { + kthread_stop(dev->worker); + dev->worker = NULL; + } } static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz) diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c index c91a7f7..5d786bd 100644 --- a/drivers/video/pxa168fb.c +++ b/drivers/video/pxa168fb.c @@ -559,7 +559,7 @@ static struct fb_ops pxa168fb_ops = { .fb_imageblit = cfb_imageblit, }; -static int __init pxa168fb_init_mode(struct fb_info *info, +static int __devinit pxa168fb_init_mode(struct fb_info *info, struct pxa168fb_mach_info *mi) { struct pxa168fb_info *fbi = info->par; @@ -599,7 +599,7 @@ static int __init pxa168fb_init_mode(struct fb_info *info, return ret; } -static int __init pxa168fb_probe(struct platform_device *pdev) +static int __devinit pxa168fb_probe(struct platform_device *pdev) { struct pxa168fb_mach_info *mi; struct fb_info *info = 0; @@ -792,7 +792,7 @@ static struct platform_driver pxa168fb_driver = { .probe = pxa168fb_probe, }; -static int __devinit pxa168fb_init(void) +static int __init pxa168fb_init(void) { return platform_driver_register(&pxa168fb_driver); } diff --git a/drivers/video/via/ioctl.c b/drivers/video/via/ioctl.c index da03c07..4d553d0 100644 --- a/drivers/video/via/ioctl.c +++ b/drivers/video/via/ioctl.c @@ -25,6 +25,8 @@ int viafb_ioctl_get_viafb_info(u_long arg) { struct viafb_ioctl_info viainfo; + memset(&viainfo, 0, sizeof(struct viafb_ioctl_info)); + viainfo.viafb_id = VIAID; viainfo.vendor_id = PCI_VIA_VENDOR_ID; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b036677..24efd8e 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -213,11 +213,11 @@ config OMAP_WATCHDOG here to enable the OMAP1610/OMAP1710/OMAP2420/OMAP3430/OMAP4430 watchdog timer. config PNX4008_WATCHDOG - tristate "PNX4008 Watchdog" - depends on ARCH_PNX4008 + tristate "PNX4008 and LPC32XX Watchdog" + depends on ARCH_PNX4008 || ARCH_LPC32XX help Say Y here if to include support for the watchdog timer - in the PNX4008 processor. + in the PNX4008 or LPC32XX processor. This driver can be built as a module by choosing M. The module will be called pnx4008_wdt. diff --git a/drivers/watchdog/sb_wdog.c b/drivers/watchdog/sb_wdog.c index 88c83aa..f31493e 100644 --- a/drivers/watchdog/sb_wdog.c +++ b/drivers/watchdog/sb_wdog.c @@ -305,7 +305,7 @@ static int __init sbwdog_init(void) if (ret) { printk(KERN_ERR "%s: failed to request irq 1 - %d\n", ident.identity, ret); - return ret; + goto out; } ret = misc_register(&sbwdog_miscdev); @@ -313,14 +313,20 @@ static int __init sbwdog_init(void) printk(KERN_INFO "%s: timeout is %ld.%ld secs\n", ident.identity, timeout / 1000000, (timeout / 100000) % 10); - } else - free_irq(1, (void *)user_dog); + return 0; + } + free_irq(1, (void *)user_dog); +out: + unregister_reboot_notifier(&sbwdog_notifier); + return ret; } static void __exit sbwdog_exit(void) { misc_deregister(&sbwdog_miscdev); + free_irq(1, (void *)user_dog); + unregister_reboot_notifier(&sbwdog_notifier); } module_init(sbwdog_init); diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index 458c499..18cdeb4 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -449,6 +449,9 @@ static __devinit int ts72xx_wdt_probe(struct platform_device *pdev) wdt->pdev = pdev; mutex_init(&wdt->lock); + /* make sure that the watchdog is disabled */ + ts72xx_wdt_stop(wdt); + error = misc_register(&ts72xx_wdt_miscdev); if (error) { dev_err(&pdev->dev, "failed to register miscdev\n"); diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 3585636..6406f89 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -242,7 +242,8 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) } kfree(wnames); fid_out: - v9fs_fid_add(dentry, fid); + if (!IS_ERR(fid)) + v9fs_fid_add(dentry, fid); err_out: up_read(&v9ses->rename_sem); return fid; diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 16c8a2a..899f168 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -292,9 +292,11 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) fid = filp->private_data; P9_DPRINTK(P9_DEBUG_VFS, - "inode: %p filp: %p fid: %d\n", inode, filp, fid->fid); + "v9fs_dir_release: inode: %p filp: %p fid: %d\n", + inode, filp, fid ? fid->fid : -1); filemap_write_and_wait(inode->i_mapping); - p9_client_clunk(fid); + if (fid) + p9_client_clunk(fid); return 0; } diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c7c23ea..9e670d5 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -730,7 +730,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode, P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } - dentry->d_op = &v9fs_cached_dentry_operations; + if (v9ses->cache) + dentry->d_op = &v9fs_cached_dentry_operations; + else + dentry->d_op = &v9fs_dentry_operations; d_instantiate(dentry, inode); err = v9fs_fid_add(dentry, fid); if (err < 0) @@ -1128,6 +1131,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb); generic_fillattr(dentry->d_inode, stat); + p9stat_free(st); kfree(st); return 0; } @@ -1489,6 +1493,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) retval = strnlen(buffer, buflen); done: + p9stat_free(st); kfree(st); return retval; } @@ -1942,7 +1947,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, - .mknod = v9fs_vfs_mknod_dotl, + .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index f931107..1d12ba0 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -122,6 +122,10 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, fid = v9fs_session_init(v9ses, dev_name, data); if (IS_ERR(fid)) { retval = PTR_ERR(fid); + /* + * we need to call session_close to tear down some + * of the data structure setup by session_init + */ goto close_session; } @@ -144,7 +148,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, retval = -ENOMEM; goto release_sb; } - sb->s_root = root; if (v9fs_proto_dotl(v9ses)) { @@ -152,7 +155,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto clunk_fid; + goto release_sb; } v9fs_stat2inode_dotl(st, root->d_inode); @@ -162,7 +165,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto clunk_fid; + goto release_sb; } root->d_inode->i_ino = v9fs_qid2ino(&st->qid); @@ -174,19 +177,24 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, v9fs_fid_add(root, fid); -P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); + P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); simple_set_mnt(mnt, sb); return 0; clunk_fid: p9_client_clunk(fid); - close_session: v9fs_session_close(v9ses); kfree(v9ses); return retval; - release_sb: + /* + * we will do the session_close and root dentry release + * in the below call. But we need to clunk fid, because we haven't + * attached the fid to dentry so it won't get clunked + * automatically. + */ + p9_client_clunk(fid); deactivate_locked_super(sb); return retval; } diff --git a/fs/aio.c b/fs/aio.c index 3006b5b..1320b2a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1659,6 +1659,9 @@ long do_io_submit(aio_context_t ctx_id, long nr, if (unlikely(nr < 0)) return -EINVAL; + if (unlikely(nr > LONG_MAX/sizeof(*iocbpp))) + nr = LONG_MAX/sizeof(*iocbpp); + if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp))))) return -EFAULT; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a7528b9..fd0cc0b 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -724,7 +724,7 @@ static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); if (!err) { - err = register_binfmt(&misc_format); + err = insert_binfmt(&misc_format); if (err) unregister_filesystem(&bm_fs_type); } diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 612a5c3..4d0ff5e 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -413,10 +413,10 @@ int bio_integrity_prep(struct bio *bio) /* Allocate kernel buffer for protection data */ len = sectors * blk_integrity_tuple_size(bi); - buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp); + buf = kmalloc(len, GFP_NOIO | q->bounce_gfp); if (unlikely(buf == NULL)) { printk(KERN_ERR "could not allocate integrity buffer\n"); - return -EIO; + return -ENOMEM; } end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT; diff --git a/fs/block_dev.c b/fs/block_dev.c index 50e8c85..b737451 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -370,7 +370,7 @@ int blkdev_fsync(struct file *filp, int datasync) */ mutex_unlock(&bd_inode->i_mutex); - error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); + error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); if (error == -EOPNOTSUPP) error = 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 64f1008..5e789f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2063,7 +2063,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { + if (printk_ratelimit()) { printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", bdevname(bh->b_bdev, b)); @@ -2200,21 +2200,10 @@ static int write_dev_supers(struct btrfs_device *device, bh->b_end_io = btrfs_end_buffer_write_sync; } - if (i == last_barrier && do_barriers && device->barriers) { - ret = submit_bh(WRITE_BARRIER, bh); - if (ret == -EOPNOTSUPP) { - printk("btrfs: disabling barriers on dev %s\n", - device->name); - set_buffer_uptodate(bh); - device->barriers = 0; - /* one reference for submit_bh */ - get_bh(bh); - lock_buffer(bh); - ret = submit_bh(WRITE_SYNC, bh); - } - } else { + if (i == last_barrier && do_barriers) + ret = submit_bh(WRITE_FLUSH_FUA, bh); + else ret = submit_bh(WRITE_SYNC, bh); - } if (ret) errors++; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d0940..0b81ecd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1695,8 +1695,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, static void btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd318ff..e25e46a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -398,7 +398,6 @@ static noinline int device_list_add(const char *path, device->work.func = pending_bios_fn; memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); - device->barriers = 1; spin_lock_init(&device->io_lock); device->name = kstrdup(path, GFP_NOFS); if (!device->name) { @@ -462,7 +461,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) device->devid = orig_dev->devid; device->work.func = pending_bios_fn; memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - device->barriers = 1; spin_lock_init(&device->io_lock); INIT_LIST_HEAD(&device->dev_list); INIT_LIST_HEAD(&device->dev_alloc_list); @@ -1489,7 +1487,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) trans = btrfs_start_transaction(root, 0); lock_chunks(root); - device->barriers = 1; device->writeable = 1; device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); @@ -3084,7 +3081,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, return NULL; list_add(&device->dev_list, &fs_devices->devices); - device->barriers = 1; device->dev_root = root->fs_info->dev_root; device->devid = devid; device->work.func = pending_bios_fn; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 31b0fab..2b638b6 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -42,7 +42,6 @@ struct btrfs_device { int running_pending; u64 generation; - int barriers; int writeable; int in_fs_metadata; diff --git a/fs/buffer.c b/fs/buffer.c index 3e7dca2..7f0b9b0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { + if (!quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - set_bit(BH_Eopnotsupp, &bh->b_state); } if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) @@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) bh->b_end_io = end_buffer_write_sync; ret = submit_bh(rw, bh); wait_on_buffer(bh); - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - ret = -EOPNOTSUPP; - } if (!ret && !buffer_uptodate(bh)) ret = -EIO; } else { diff --git a/fs/char_dev.c b/fs/char_dev.c index f80a4f2..143d393 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = { #endif /* permit direct mmap, for read, write or exec */ BDI_CAP_MAP_DIRECT | - BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), + BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | + /* no writeback happens */ + BDI_CAP_NO_ACCT_AND_WRITEBACK), }; static struct kobj_map *cdev_map; diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 0da1deb..917b7d4 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -2,8 +2,6 @@ config CIFS tristate "CIFS support (advanced network filesystem, SMBFS successor)" depends on INET select NLS - select CRYPTO_MD5 - select CRYPTO_ARC4 help This is the client VFS module for the Common Internet File System (CIFS) protocol which is the successor to the Server Message Block diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 21f0fbd..cfd1ce3 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -597,13 +597,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN)) server->sec_mskerberos = true; - if (compare_oid(oid, oidlen, KRB5U2U_OID, + else if (compare_oid(oid, oidlen, KRB5U2U_OID, KRB5U2U_OID_LEN)) server->sec_kerberosu2u = true; - if (compare_oid(oid, oidlen, KRB5_OID, + else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN)) server->sec_kerberos = true; - if (compare_oid(oid, oidlen, NTLMSSP_OID, + else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) server->sec_ntlmssp = true; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 709f229..35042d8 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -27,7 +27,6 @@ #include "md5.h" #include "cifs_unicode.h" #include "cifsproto.h" -#include "ntlmssp.h" #include #include @@ -43,43 +42,21 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, - struct TCP_Server_Info *server, char *signature) + const struct mac_key *key, char *signature) { - int rc; + struct MD5Context context; - if (cifs_pdu == NULL || server == NULL || signature == NULL) + if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL)) return -EINVAL; - if (!server->ntlmssp.sdescmd5) { - cERROR(1, - "cifs_calculate_signature: can't generate signature\n"); - return -1; - } - - rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); - if (rc) { - cERROR(1, "cifs_calculate_signature: oould not init md5\n"); - return rc; - } - - if (server->secType == RawNTLMSSP) - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - server->session_key.data.ntlmv2.key, - CIFS_NTLMV2_SESSKEY_SIZE); - else - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - (char *)&server->session_key.data, - server->session_key.len); - - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - cifs_pdu->Protocol, cifs_pdu->smb_buf_length); + cifs_MD5_init(&context); + cifs_MD5_update(&context, (char *)&key->data, key->len); + cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length); - rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); - - return rc; + cifs_MD5_final(signature, &context); + return 0; } - int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -101,7 +78,8 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calculate_signature(cifs_pdu, server, smb_signature); + rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key, + smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -111,39 +89,21 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, } static int cifs_calc_signature2(const struct kvec *iov, int n_vec, - struct TCP_Server_Info *server, char *signature) + const struct mac_key *key, char *signature) { + struct MD5Context context; int i; - int rc; - if (iov == NULL || server == NULL || signature == NULL) + if ((iov == NULL) || (signature == NULL) || (key == NULL)) return -EINVAL; - if (!server->ntlmssp.sdescmd5) { - cERROR(1, "cifs_calc_signature2: can't generate signature\n"); - return -1; - } - - rc = crypto_shash_init(&server->ntlmssp.sdescmd5->shash); - if (rc) { - cERROR(1, "cifs_calc_signature2: oould not init md5\n"); - return rc; - } - - if (server->secType == RawNTLMSSP) - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - server->session_key.data.ntlmv2.key, - CIFS_NTLMV2_SESSKEY_SIZE); - else - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - (char *)&server->session_key.data, - server->session_key.len); - + cifs_MD5_init(&context); + cifs_MD5_update(&context, (char *)&key->data, key->len); for (i = 0; i < n_vec; i++) { if (iov[i].iov_len == 0) continue; if (iov[i].iov_base == NULL) { - cERROR(1, "cifs_calc_signature2: null iovec entry"); + cERROR(1, "null iovec entry"); return -EIO; } /* The first entry includes a length field (which does not get @@ -151,18 +111,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, if (i == 0) { if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ break; /* nothing to sign or corrupt header */ - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - iov[i].iov_base + 4, iov[i].iov_len - 4); + cifs_MD5_update(&context, iov[0].iov_base+4, + iov[0].iov_len-4); } else - crypto_shash_update(&server->ntlmssp.sdescmd5->shash, - iov[i].iov_base, iov[i].iov_len); + cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len); } - rc = crypto_shash_final(&server->ntlmssp.sdescmd5->shash, signature); + cifs_MD5_final(signature, &context); - return rc; + return 0; } + int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, __u32 *pexpected_response_sequence_number) { @@ -185,7 +145,8 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, server->sequence_number++; spin_unlock(&GlobalMid_Lock); - rc = cifs_calc_signature2(iov, n_vec, server, smb_signature); + rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key, + smb_signature); if (rc) memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else @@ -195,14 +156,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, } int cifs_verify_signature(struct smb_hdr *cifs_pdu, - struct TCP_Server_Info *server, + const struct mac_key *mac_key, __u32 expected_sequence_number) { - int rc; + unsigned int rc; char server_response_sig[8]; char what_we_think_sig_should_be[20]; - if (cifs_pdu == NULL || server == NULL) + if ((cifs_pdu == NULL) || (mac_key == NULL)) return -EINVAL; if (cifs_pdu->Command == SMB_COM_NEGOTIATE) @@ -231,7 +192,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, cpu_to_le32(expected_sequence_number); cifs_pdu->Signature.Sequence.Reserved = 0; - rc = cifs_calculate_signature(cifs_pdu, server, + rc = cifs_calculate_signature(cifs_pdu, mac_key, what_we_think_sig_should_be); if (rc) @@ -248,7 +209,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, } /* We fill in key by putting in 40 byte array which was allocated by caller */ -int cifs_calculate_session_key(struct session_key *key, const char *rn, +int cifs_calculate_mac_key(struct mac_key *key, const char *rn, const char *password) { char temp_key[16]; @@ -306,52 +267,38 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, { int rc = 0; int len; - char nt_hash[CIFS_NTHASH_SIZE]; + char nt_hash[16]; + struct HMACMD5Context *pctxt; wchar_t *user; wchar_t *domain; - wchar_t *server; - if (!ses->server->ntlmssp.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); - return -1; - } + pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL); + + if (pctxt == NULL) + return -ENOMEM; /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash); - crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, nt_hash, - CIFS_NTHASH_SIZE); - - rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); - if (rc) { - cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n"); - return rc; - } + /* convert Domainname to unicode and uppercase */ + hmac_md5_init_limK_to_64(nt_hash, 16, pctxt); /* convert ses->userName to unicode and uppercase */ len = strlen(ses->userName); user = kmalloc(2 + (len * 2), GFP_KERNEL); - if (user == NULL) { - cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); - rc = -ENOMEM; + if (user == NULL) goto calc_exit_2; - } len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp); UniStrupr(user); - - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - (char *)user, 2 * len); + hmac_md5_update((char *)user, 2*len, pctxt); /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { len = strlen(ses->domainName); domain = kmalloc(2 + (len * 2), GFP_KERNEL); - if (domain == NULL) { - cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); - rc = -ENOMEM; + if (domain == NULL) goto calc_exit_1; - } len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, nls_cp); /* the following line was removed since it didn't work well @@ -359,292 +306,65 @@ static int calc_ntlmv2_hash(struct cifsSesInfo *ses, Maybe converting the domain name earlier makes sense */ /* UniStrupr(domain); */ - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - (char *)domain, 2 * len); + hmac_md5_update((char *)domain, 2*len, pctxt); kfree(domain); - } else if (ses->serverName) { - len = strlen(ses->serverName); - - server = kmalloc(2 + (len * 2), GFP_KERNEL); - if (server == NULL) { - cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); - rc = -ENOMEM; - goto calc_exit_1; - } - len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, - nls_cp); - /* the following line was removed since it didn't work well - with lower cased domain name that passed as an option. - Maybe converting the domain name earlier makes sense */ - /* UniStrupr(domain); */ - - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - (char *)server, 2 * len); - - kfree(server); } - - rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, - ses->server->ntlmv2_hash); - calc_exit_1: kfree(user); calc_exit_2: /* BB FIXME what about bytes 24 through 40 of the signing key? compare with the NTLM example */ + hmac_md5_final(ses->server->ntlmv2_hash, pctxt); + kfree(pctxt); return rc; } -static int -find_domain_name(struct cifsSesInfo *ses) -{ - int rc = 0; - unsigned int attrsize; - unsigned int type; - unsigned char *blobptr; - struct ntlmssp2_name *attrptr; - - if (ses->server->tiblob) { - blobptr = ses->server->tiblob; - attrptr = (struct ntlmssp2_name *) blobptr; - - while ((type = attrptr->type) != 0) { - blobptr += 2; /* advance attr type */ - attrsize = attrptr->length; - blobptr += 2; /* advance attr size */ - if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { - if (!ses->domainName) { - ses->domainName = - kmalloc(attrptr->length + 1, - GFP_KERNEL); - if (!ses->domainName) - return -ENOMEM; - cifs_from_ucs2(ses->domainName, - (__le16 *)blobptr, - attrptr->length, - attrptr->length, - load_nls_default(), false); - } - } - blobptr += attrsize; /* advance attr value */ - attrptr = (struct ntlmssp2_name *) blobptr; - } - } else { - ses->server->tilen = 2 * sizeof(struct ntlmssp2_name); - ses->server->tiblob = kmalloc(ses->server->tilen, GFP_KERNEL); - if (!ses->server->tiblob) { - ses->server->tilen = 0; - cERROR(1, "Challenge target info allocation failure"); - return -ENOMEM; - } - memset(ses->server->tiblob, 0x0, ses->server->tilen); - attrptr = (struct ntlmssp2_name *) ses->server->tiblob; - attrptr->type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); - } - - return rc; -} - -static int -CalcNTLMv2_response(const struct TCP_Server_Info *server, - char *v2_session_response) -{ - int rc; - - if (!server->ntlmssp.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); - return -1; - } - - crypto_shash_setkey(server->ntlmssp.hmacmd5, server->ntlmv2_hash, - CIFS_HMAC_MD5_HASH_SIZE); - - rc = crypto_shash_init(&server->ntlmssp.sdeschmacmd5->shash); - if (rc) { - cERROR(1, "CalcNTLMv2_response: could not init hmacmd5"); - return rc; - } - - memcpy(v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, - server->cryptKey, CIFS_SERVER_CHALLENGE_SIZE); - crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, - v2_session_response + CIFS_SERVER_CHALLENGE_SIZE, - sizeof(struct ntlmv2_resp) - CIFS_SERVER_CHALLENGE_SIZE); - - if (server->tilen) - crypto_shash_update(&server->ntlmssp.sdeschmacmd5->shash, - server->tiblob, server->tilen); - - rc = crypto_shash_final(&server->ntlmssp.sdeschmacmd5->shash, - v2_session_response); - - return rc; -} - -int -setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, +void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, const struct nls_table *nls_cp) { - int rc = 0; + int rc; struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf; + struct HMACMD5Context context; buf->blob_signature = cpu_to_le32(0x00000101); buf->reserved = 0; buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); buf->reserved2 = 0; - - if (!ses->domainName) { - rc = find_domain_name(ses); - if (rc) { - cERROR(1, "could not get domain/server name rc %d", rc); - return rc; - } - } + buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); + buf->names[0].length = 0; + buf->names[1].type = 0; + buf->names[1].length = 0; /* calculate buf->ntlmv2_hash */ rc = calc_ntlmv2_hash(ses, nls_cp); - if (rc) { - cERROR(1, "could not get v2 hash rc %d", rc); - return rc; - } - rc = CalcNTLMv2_response(ses->server, resp_buf); - if (rc) { + if (rc) cERROR(1, "could not get v2 hash rc %d", rc); - return rc; - } - - if (!ses->server->ntlmssp.sdeschmacmd5) { - cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n"); - return -1; - } - - crypto_shash_setkey(ses->server->ntlmssp.hmacmd5, - ses->server->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + CalcNTLMv2_response(ses, resp_buf); - rc = crypto_shash_init(&ses->server->ntlmssp.sdeschmacmd5->shash); - if (rc) { - cERROR(1, "setup_ntlmv2_rsp: could not init hmacmd5\n"); - return rc; - } + /* now calculate the MAC key for NTLMv2 */ + hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); + hmac_md5_update(resp_buf, 16, &context); + hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context); - crypto_shash_update(&ses->server->ntlmssp.sdeschmacmd5->shash, - resp_buf, CIFS_HMAC_MD5_HASH_SIZE); - - rc = crypto_shash_final(&ses->server->ntlmssp.sdeschmacmd5->shash, - ses->server->session_key.data.ntlmv2.key); - - memcpy(&ses->server->session_key.data.ntlmv2.resp, resp_buf, - sizeof(struct ntlmv2_resp)); - ses->server->session_key.len = 16 + sizeof(struct ntlmv2_resp); - - return rc; + memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf, + sizeof(struct ntlmv2_resp)); + ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp); } -int -calc_seckey(struct TCP_Server_Info *server) -{ - int rc; - unsigned char sec_key[CIFS_NTLMV2_SESSKEY_SIZE]; - struct crypto_blkcipher *tfm_arc4; - struct scatterlist sgin, sgout; - struct blkcipher_desc desc; - - get_random_bytes(sec_key, CIFS_NTLMV2_SESSKEY_SIZE); - - tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", - 0, CRYPTO_ALG_ASYNC); - if (!tfm_arc4 || IS_ERR(tfm_arc4)) { - cERROR(1, "could not allocate " "master crypto API arc4\n"); - return 1; - } - - desc.tfm = tfm_arc4; - - crypto_blkcipher_setkey(tfm_arc4, - server->session_key.data.ntlmv2.key, CIFS_CPHTXT_SIZE); - sg_init_one(&sgin, sec_key, CIFS_CPHTXT_SIZE); - sg_init_one(&sgout, server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); - rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE); - - if (!rc) - memcpy(server->session_key.data.ntlmv2.key, - sec_key, CIFS_NTLMV2_SESSKEY_SIZE); - - crypto_free_blkcipher(tfm_arc4); - - return 0; -} - -void -cifs_crypto_shash_release(struct TCP_Server_Info *server) -{ - if (server->ntlmssp.md5) - crypto_free_shash(server->ntlmssp.md5); - - if (server->ntlmssp.hmacmd5) - crypto_free_shash(server->ntlmssp.hmacmd5); - - kfree(server->ntlmssp.sdeschmacmd5); - - kfree(server->ntlmssp.sdescmd5); -} - -int -cifs_crypto_shash_allocate(struct TCP_Server_Info *server) +void CalcNTLMv2_response(const struct cifsSesInfo *ses, + char *v2_session_response) { - int rc; - unsigned int size; - - server->ntlmssp.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); - if (!server->ntlmssp.hmacmd5 || - IS_ERR(server->ntlmssp.hmacmd5)) { - cERROR(1, "could not allocate crypto hmacmd5\n"); - return 1; - } - - server->ntlmssp.md5 = crypto_alloc_shash("md5", 0, 0); - if (!server->ntlmssp.md5 || IS_ERR(server->ntlmssp.md5)) { - cERROR(1, "could not allocate crypto md5\n"); - rc = 1; - goto cifs_crypto_shash_allocate_ret1; - } - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->ntlmssp.hmacmd5); - server->ntlmssp.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); - if (!server->ntlmssp.sdeschmacmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n"); - rc = -ENOMEM; - goto cifs_crypto_shash_allocate_ret2; - } - server->ntlmssp.sdeschmacmd5->shash.tfm = server->ntlmssp.hmacmd5; - server->ntlmssp.sdeschmacmd5->shash.flags = 0x0; + struct HMACMD5Context context; + /* rest of v2 struct already generated */ + memcpy(v2_session_response + 8, ses->server->cryptKey, 8); + hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); + hmac_md5_update(v2_session_response+8, + sizeof(struct ntlmv2_resp) - 8, &context); - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->ntlmssp.md5); - server->ntlmssp.sdescmd5 = kmalloc(size, GFP_KERNEL); - if (!server->ntlmssp.sdescmd5) { - cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n"); - rc = -ENOMEM; - goto cifs_crypto_shash_allocate_ret3; - } - server->ntlmssp.sdescmd5->shash.tfm = server->ntlmssp.md5; - server->ntlmssp.sdescmd5->shash.flags = 0x0; - - return 0; - -cifs_crypto_shash_allocate_ret3: - kfree(server->ntlmssp.sdeschmacmd5); - -cifs_crypto_shash_allocate_ret2: - crypto_free_shash(server->ntlmssp.md5); - -cifs_crypto_shash_allocate_ret1: - crypto_free_shash(server->ntlmssp.hmacmd5); - - return rc; + hmac_md5_final(v2_session_response, &context); +/* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c9d0cfc..0cdfb8c 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -25,9 +25,6 @@ #include #include "cifs_fs_sb.h" #include "cifsacl.h" -#include -#include - /* * The sizes of various internal tables and strings */ @@ -100,7 +97,7 @@ enum protocolEnum { /* Netbios frames protocol not supported at this time */ }; -struct session_key { +struct mac_key { unsigned int len; union { char ntlm[CIFS_SESS_KEY_SIZE + 16]; @@ -123,21 +120,6 @@ struct cifs_cred { struct cifs_ace *aces; }; -struct sdesc { - struct shash_desc shash; - char ctx[]; -}; - -struct ntlmssp_auth { - __u32 client_flags; - __u32 server_flags; - unsigned char ciphertext[CIFS_CPHTXT_SIZE]; - struct crypto_shash *hmacmd5; - struct crypto_shash *md5; - struct sdesc *sdeschmacmd5; - struct sdesc *sdescmd5; -}; - /* ***************************************************************** * Except the CIFS PDUs themselves all the @@ -200,14 +182,11 @@ struct TCP_Server_Info { /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* needed for CIFS PDU signature */ - struct session_key session_key; + struct mac_key mac_signing_key; char ntlmv2_hash[16]; unsigned long lstrp; /* when we got last response from this server */ u16 dialect; /* dialect index that server chose */ /* extended security flavors that server supports */ - unsigned int tilen; /* length of the target info blob */ - unsigned char *tiblob; /* target info blob in challenge response */ - struct ntlmssp_auth ntlmssp; /* various keys, ciphers, flags */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_kerberosu2u; /* supports U2U Kerberos */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 320e0fd..14d036d 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -134,12 +134,6 @@ * Size of the session key (crypto key encrypted with the password */ #define CIFS_SESS_KEY_SIZE (24) -#define CIFS_CLIENT_CHALLENGE_SIZE (8) -#define CIFS_SERVER_CHALLENGE_SIZE (8) -#define CIFS_HMAC_MD5_HASH_SIZE (16) -#define CIFS_CPHTXT_SIZE (16) -#define CIFS_NTLMV2_SESSKEY_SIZE (16) -#define CIFS_NTHASH_SIZE (16) /* * Maximum user name length @@ -669,6 +663,7 @@ struct ntlmv2_resp { __le64 time; __u64 client_chal; /* random */ __u32 reserved2; + struct ntlmssp2_name names[2]; /* array of name entries could follow ending in minimum 4 byte struct */ } __attribute__((packed)); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1378d91..1d60c65 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -87,8 +87,9 @@ extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, struct TCP_Server_Info *server); extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); +extern int cifs_set_port(struct sockaddr *addr, const unsigned short int port); extern int cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, - unsigned short int port); + const unsigned short int port); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of @@ -361,15 +362,13 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, __u32 *); extern int cifs_verify_signature(struct smb_hdr *, - struct TCP_Server_Info *server, + const struct mac_key *mac_key, __u32 expected_sequence_number); -extern int cifs_calculate_session_key(struct session_key *key, const char *rn, +extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, const char *pass); -extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, +extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); +extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, const struct nls_table *); -extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); -extern void cifs_crypto_shash_release(struct TCP_Server_Info *); -extern int calc_seckey(struct TCP_Server_Info *); #ifdef CONFIG_CIFS_WEAK_PW_HASH extern void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, char *lnm_session_key); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4bda920..c65c341 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -604,14 +604,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else rc = -EINVAL; - if (server->secType == Kerberos) { - if (!server->sec_kerberos && - !server->sec_mskerberos) - rc = -EOPNOTSUPP; - } else if (server->secType == RawNTLMSSP) { - if (!server->sec_ntlmssp) - rc = -EOPNOTSUPP; - } else + if (server->sec_kerberos || server->sec_mskerberos) + server->secType = Kerberos; + else if (server->sec_ntlmssp) + server->secType = RawNTLMSSP; + else rc = -EOPNOTSUPP; } } else diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ec0ea4a..88c84a3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -400,7 +400,9 @@ incomplete_rcv: cFYI(1, "call to reconnect done"); csocket = server->ssocket; continue; - } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { + } else if (length == -ERESTARTSYS || + length == -EAGAIN || + length == -EINTR) { msleep(1); /* minimum sleep to prevent looping allowing socket to clear and app threads to set tcpStatus CifsNeedReconnect if server hung */ @@ -414,18 +416,6 @@ incomplete_rcv: } else continue; } else if (length <= 0) { - if (server->tcpStatus == CifsNew) { - cFYI(1, "tcp session abend after SMBnegprot"); - /* some servers kill the TCP session rather than - returning an SMB negprot error, in which - case reconnecting here is not going to help, - and so simply return error to mount */ - break; - } - if (!try_to_freeze() && (length == -EINTR)) { - cFYI(1, "cifsd thread killed"); - break; - } cFYI(1, "Reconnect after unexpected peek error %d", length); cifs_reconnect(server); @@ -466,27 +456,19 @@ incomplete_rcv: an error on SMB negprot response */ cFYI(1, "Negative RFC1002 Session Response Error 0x%x)", pdu_length); - if (server->tcpStatus == CifsNew) { - /* if nack on negprot (rather than - ret of smb negprot error) reconnecting - not going to help, ret error to mount */ - break; - } else { - /* give server a second to - clean up before reconnect attempt */ - msleep(1000); - /* always try 445 first on reconnect - since we get NACK on some if we ever - connected to port 139 (the NACK is - since we do not begin with RFC1001 - session initialize frame) */ - server->addr.sockAddr.sin_port = - htons(CIFS_PORT); - cifs_reconnect(server); - csocket = server->ssocket; - wake_up(&server->response_q); - continue; - } + /* give server a second to clean up */ + msleep(1000); + /* always try 445 first on reconnect since we get NACK + * on some if we ever connected to port 139 (the NACK + * is since we do not begin with RFC1001 session + * initialize frame) + */ + cifs_set_port((struct sockaddr *) + &server->addr.sockAddr, CIFS_PORT); + cifs_reconnect(server); + csocket = server->ssocket; + wake_up(&server->response_q); + continue; } else if (temp != (char) 0) { cERROR(1, "Unknown RFC 1002 frame"); cifs_dump_mem(" Received Data: ", (char *)smb_buffer, @@ -522,8 +504,7 @@ incomplete_rcv: total_read += length) { length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, pdu_length - total_read, 0); - if ((server->tcpStatus == CifsExiting) || - (length == -EINTR)) { + if (server->tcpStatus == CifsExiting) { /* then will exit */ reconnect = 2; break; @@ -534,8 +515,9 @@ incomplete_rcv: /* Now we will reread sock */ reconnect = 1; break; - } else if ((length == -ERESTARTSYS) || - (length == -EAGAIN)) { + } else if (length == -ERESTARTSYS || + length == -EAGAIN || + length == -EINTR) { msleep(1); /* minimum sleep to prevent looping, allowing socket to clear and app threads to set tcpStatus @@ -1708,7 +1690,6 @@ cifs_put_smb_ses(struct cifsSesInfo *ses) CIFSSMBLogoff(xid, ses); _FreeXid(xid); } - cifs_crypto_shash_release(server); sesInfoFree(ses); cifs_put_tcp_session(server); } @@ -1725,9 +1706,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) if (ses) { cFYI(1, "Existing smb sess found (status=%d)", ses->status); - /* existing SMB ses has a server reference already */ - cifs_put_tcp_session(server); - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); if (rc) { @@ -1750,6 +1728,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) } } mutex_unlock(&ses->session_mutex); + + /* existing SMB ses has a server reference already */ + cifs_put_tcp_session(server); FreeXid(xid); return ses; } @@ -1788,23 +1769,13 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) ses->linux_uid = volume_info->linux_uid; ses->overrideSecFlg = volume_info->secFlg; - rc = cifs_crypto_shash_allocate(server); - if (rc) { - cERROR(1, "could not setup hash structures rc %d", rc); - goto get_ses_fail; - } - server->tilen = 0; - server->tiblob = NULL; - mutex_lock(&ses->session_mutex); rc = cifs_negotiate_protocol(xid, ses); if (!rc) rc = cifs_setup_session(xid, ses, volume_info->local_nls); mutex_unlock(&ses->session_mutex); - if (rc) { - cifs_crypto_shash_release(ses->server); + if (rc) goto get_ses_fail; - } /* success, put it on the list */ write_lock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 86a164f..93f77d4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1462,29 +1462,18 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, { char *fromName = NULL; char *toName = NULL; - struct cifs_sb_info *cifs_sb_source; - struct cifs_sb_info *cifs_sb_target; + struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; FILE_UNIX_BASIC_INFO *info_buf_source = NULL; FILE_UNIX_BASIC_INFO *info_buf_target; int xid, rc, tmprc; - cifs_sb_target = CIFS_SB(target_dir->i_sb); - cifs_sb_source = CIFS_SB(source_dir->i_sb); - tcon = cifs_sb_source->tcon; + cifs_sb = CIFS_SB(source_dir->i_sb); + tcon = cifs_sb->tcon; xid = GetXid(); /* - * BB: this might be allowed if same server, but different share. - * Consider adding support for this - */ - if (tcon != cifs_sb_target->tcon) { - rc = -EXDEV; - goto cifs_rename_exit; - } - - /* * we already have the rename sem so we do not need to * grab it again here to protect the path integrity */ @@ -1519,17 +1508,16 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, info_buf_target = info_buf_source + 1; tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, info_buf_source, - cifs_sb_source->local_nls, - cifs_sb_source->mnt_cifs_flags & + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc != 0) goto unlink_target; - tmprc = CIFSSMBUnixQPathInfo(xid, tcon, - toName, info_buf_target, - cifs_sb_target->local_nls, - /* remap based on source sb */ - cifs_sb_source->mnt_cifs_flags & + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, toName, + info_buf_target, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (tmprc == 0 && (info_buf_source->UniqueId == diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index f978511..9aad47a 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -206,26 +206,30 @@ cifs_convert_address(struct sockaddr *dst, const char *src, int len) } int -cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, - const unsigned short int port) +cifs_set_port(struct sockaddr *addr, const unsigned short int port) { - if (!cifs_convert_address(dst, src, len)) - return 0; - - switch (dst->sa_family) { + switch (addr->sa_family) { case AF_INET: - ((struct sockaddr_in *)dst)->sin_port = htons(port); + ((struct sockaddr_in *)addr)->sin_port = htons(port); break; case AF_INET6: - ((struct sockaddr_in6 *)dst)->sin6_port = htons(port); + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); break; default: return 0; } - return 1; } +int +cifs_fill_sockaddr(struct sockaddr *dst, const char *src, int len, + const unsigned short int port) +{ + if (!cifs_convert_address(dst, src, len)) + return 0; + return cifs_set_port(dst, port); +} + /***************************************************************************** convert a NT status code to a dos class/code *****************************************************************************/ diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 1db0f07..49c9a4e 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -61,19 +61,6 @@ #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 #define NTLMSSP_NEGOTIATE_56 0x80000000 -/* Define AV Pair Field IDs */ -#define NTLMSSP_AV_EOL 0 -#define NTLMSSP_AV_NB_COMPUTER_NAME 1 -#define NTLMSSP_AV_NB_DOMAIN_NAME 2 -#define NTLMSSP_AV_DNS_COMPUTER_NAME 3 -#define NTLMSSP_AV_DNS_DOMAIN_NAME 4 -#define NTLMSSP_AV_DNS_TREE_NAME 5 -#define NTLMSSP_AV_FLAGS 6 -#define NTLMSSP_AV_TIMESTAMP 7 -#define NTLMSSP_AV_RESTRICTION 8 -#define NTLMSSP_AV_TARGET_NAME 9 -#define NTLMSSP_AV_CHANNEL_BINDINGS 10 - /* Although typedefs are not commonly used for structure definitions */ /* in the Linux kernel, in this particular case they are useful */ /* to more closely match the standards document for NTLMSSP from */ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 795095f..0a57cb7 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -383,9 +383,6 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft, static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifsSesInfo *ses) { - unsigned int tioffset; /* challeng message target info area */ - unsigned int tilen; /* challeng message target info area length */ - CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; if (blob_len < sizeof(CHALLENGE_MESSAGE)) { @@ -408,20 +405,6 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, /* BB spec says that if AvId field of MsvAvTimestamp is populated then we must set the MIC field of the AUTHENTICATE_MESSAGE */ - ses->server->ntlmssp.server_flags = le32_to_cpu(pblob->NegotiateFlags); - - tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); - tilen = cpu_to_le16(pblob->TargetInfoArray.Length); - ses->server->tilen = tilen; - if (tilen) { - ses->server->tiblob = kmalloc(tilen, GFP_KERNEL); - if (!ses->server->tiblob) { - cERROR(1, "Challenge target info allocation failure"); - return -ENOMEM; - } - memcpy(ses->server->tiblob, bcc_ptr + tioffset, tilen); - } - return 0; } @@ -442,13 +425,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, /* BB is NTLMV2 session security format easier to use here? */ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM; + NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; if (ses->server->secMode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { - flags |= NTLMSSP_NEGOTIATE_SIGN | - NTLMSSP_NEGOTIATE_KEY_XCH | - NTLMSSP_NEGOTIATE_EXTENDED_SEC; - } + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + flags |= NTLMSSP_NEGOTIATE_SIGN; + if (ses->server->secMode & SECMODE_SIGN_REQUIRED) + flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; sec_blob->NegotiateFlags |= cpu_to_le32(flags); @@ -469,12 +451,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, struct cifsSesInfo *ses, const struct nls_table *nls_cp, bool first) { - int rc; - unsigned int size; AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; __u32 flags; unsigned char *tmp; - struct ntlmv2_resp ntlmv2_response = {}; + char ntlm_session_key[CIFS_SESS_KEY_SIZE]; memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); sec_blob->MessageType = NtLmAuthenticate; @@ -497,25 +477,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->LmChallengeResponse.Length = 0; sec_blob->LmChallengeResponse.MaximumLength = 0; - sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); - rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); - if (rc) { - cERROR(1, "error rc: %d during ntlmssp ntlmv2 setup", rc); - goto setup_ntlmv2_ret; - } - size = sizeof(struct ntlmv2_resp); - memcpy(tmp, (char *)&ntlmv2_response, size); - tmp += size; - if (ses->server->tilen > 0) { - memcpy(tmp, ses->server->tiblob, ses->server->tilen); - tmp += ses->server->tilen; - } else - ses->server->tilen = 0; + /* calculate session key, BB what about adding similar ntlmv2 path? */ + SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); + if (first) + cifs_calculate_mac_key(&ses->server->mac_signing_key, + ntlm_session_key, ses->password); - sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + - ses->server->tilen); + memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE); + sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); sec_blob->NtChallengeResponse.MaximumLength = - cpu_to_le16(size + ses->server->tilen); + cpu_to_le16(CIFS_SESS_KEY_SIZE); + + tmp += CIFS_SESS_KEY_SIZE; if (ses->domainName == NULL) { sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); @@ -527,6 +501,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ + len += 2; /* trailing null */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); sec_blob->DomainName.MaximumLength = cpu_to_le16(len); @@ -543,6 +518,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, len = cifs_strtoUCS((__le16 *)tmp, ses->userName, MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ + len += 2; /* trailing null */ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); sec_blob->UserName.MaximumLength = cpu_to_le16(len); @@ -554,26 +530,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, sec_blob->WorkstationName.MaximumLength = 0; tmp += 2; - if ((ses->server->ntlmssp.server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) && - !calc_seckey(ses->server)) { - memcpy(tmp, ses->server->ntlmssp.ciphertext, CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); - sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE); - sec_blob->SessionKey.MaximumLength = - cpu_to_le16(CIFS_CPHTXT_SIZE); - tmp += CIFS_CPHTXT_SIZE; - } else { - sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); - sec_blob->SessionKey.Length = 0; - sec_blob->SessionKey.MaximumLength = 0; - } - - ses->server->sequence_number = 0; - -setup_ntlmv2_ret: - if (ses->server->tilen > 0) - kfree(ses->server->tiblob); - + sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); + sec_blob->SessionKey.Length = 0; + sec_blob->SessionKey.MaximumLength = 0; return tmp - pbuffer; } @@ -587,14 +546,15 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, return; } -static int setup_ntlmssp_auth_req(char *ntlmsspblob, +static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, struct cifsSesInfo *ses, const struct nls_table *nls, bool first_time) { int bloblen; - bloblen = build_ntlmssp_auth_blob(ntlmsspblob, ses, nls, + bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls, first_time); + pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen); return bloblen; } @@ -730,7 +690,7 @@ ssetup_ntlmssp_authenticate: if (first_time) /* should this be moved into common code with similar ntlmv2 path? */ - cifs_calculate_session_key(&ses->server->session_key, + cifs_calculate_mac_key(&ses->server->mac_signing_key, ntlm_session_key, ses->password); /* copy session key */ @@ -769,21 +729,12 @@ ssetup_ntlmssp_authenticate: cpu_to_le16(sizeof(struct ntlmv2_resp)); /* calculate session key */ - rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); - if (rc) { - kfree(v2_sess_key); - goto ssetup_exit; - } + setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); /* FIXME: calculate MAC key */ memcpy(bcc_ptr, (char *)v2_sess_key, sizeof(struct ntlmv2_resp)); bcc_ptr += sizeof(struct ntlmv2_resp); kfree(v2_sess_key); - if (ses->server->tilen > 0) { - memcpy(bcc_ptr, ses->server->tiblob, - ses->server->tilen); - bcc_ptr += ses->server->tilen; - } if (ses->capabilities & CAP_UNICODE) { if (iov[0].iov_len % 2) { *bcc_ptr = 0; @@ -814,15 +765,15 @@ ssetup_ntlmssp_authenticate: } /* bail out if key is too long */ if (msg->sesskey_len > - sizeof(ses->server->session_key.data.krb5)) { + sizeof(ses->server->mac_signing_key.data.krb5)) { cERROR(1, "Kerberos signing key too long (%u bytes)", msg->sesskey_len); rc = -EOVERFLOW; goto ssetup_exit; } if (first_time) { - ses->server->session_key.len = msg->sesskey_len; - memcpy(ses->server->session_key.data.krb5, + ses->server->mac_signing_key.len = msg->sesskey_len; + memcpy(ses->server->mac_signing_key.data.krb5, msg->data, msg->sesskey_len); } pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; @@ -864,28 +815,12 @@ ssetup_ntlmssp_authenticate: if (phase == NtLmNegotiate) { setup_ntlmssp_neg_req(pSMB, ses); iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); - iov[1].iov_base = &pSMB->req.SecurityBlob[0]; } else if (phase == NtLmAuthenticate) { int blob_len; - char *ntlmsspblob; - - ntlmsspblob = kmalloc(5 * - sizeof(struct _AUTHENTICATE_MESSAGE), - GFP_KERNEL); - if (!ntlmsspblob) { - cERROR(1, "Can't allocate NTLMSSP"); - rc = -ENOMEM; - goto ssetup_exit; - } - - blob_len = setup_ntlmssp_auth_req(ntlmsspblob, - ses, - nls_cp, - first_time); + blob_len = setup_ntlmssp_auth_req(pSMB, ses, + nls_cp, + first_time); iov[1].iov_len = blob_len; - iov[1].iov_base = ntlmsspblob; - pSMB->req.SecurityBlobLength = - cpu_to_le16(blob_len); /* Make sure that we tell the server that we are using the uid that it just gave us back on the response (challenge) */ @@ -895,6 +830,7 @@ ssetup_ntlmssp_authenticate: rc = -ENOSYS; goto ssetup_exit; } + iov[1].iov_base = &pSMB->req.SecurityBlob[0]; /* unicode strings must be word aligned */ if ((iov[0].iov_len + iov[1].iov_len) % 2) { *bcc_ptr = 0; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e0588cd..82f78c4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(midQ->resp_buf, - ses->server, + &ses->server->mac_signing_key, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); @@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - ses->server, + &ses->server->mac_signing_key, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); @@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))) { rc = cifs_verify_signature(out_buf, - ses->server, + &ses->server->mac_signing_key, midQ->sequence_number+1); if (rc) { cERROR(1, "Unexpected SMB signature"); diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index de89645..116af75 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -184,8 +184,8 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, } /* adjust outsize. is this useful ?? */ - req->uc_outSize = nbytes; - req->uc_flags |= REQ_WRITE; + req->uc_outSize = nbytes; + req->uc_flags |= CODA_REQ_WRITE; count = nbytes; /* Convert filedescriptor into a file handle */ diff --git a/fs/direct-io.c b/fs/direct-io.c index 51f270b..48d74c7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -634,7 +634,7 @@ static int dio_send_cur_page(struct dio *dio) int ret = 0; if (dio->bio) { - loff_t cur_offset = dio->block_in_file << dio->blkbits; + loff_t cur_offset = dio->cur_page_fs_offset; loff_t bio_next_offset = dio->logical_offset_in_bio + dio->bio->bi_size; @@ -659,7 +659,7 @@ static int dio_send_cur_page(struct dio *dio) * Submit now if the underlying fs is about to perform a * metadata read */ - if (dio->boundary) + else if (dio->boundary) dio_bio_submit(dio); } diff --git a/fs/exec.c b/fs/exec.c index 2d94552..828dd24 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -376,6 +376,9 @@ static int count(const char __user * const __user * argv, int max) argv++; if (i++ >= max) return -E2BIG; + + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; cond_resched(); } } @@ -419,6 +422,12 @@ static int copy_strings(int argc, const char __user *const __user *argv, while (len > 0) { int offset, bytes_to_copy; + if (fatal_signal_pending(current)) { + ret = -ERESTARTNOHAND; + goto out; + } + cond_resched(); + offset = pos % PAGE_SIZE; if (offset == 0) offset = PAGE_SIZE; @@ -594,6 +603,11 @@ int setup_arg_pages(struct linux_binprm *bprm, #else stack_top = arch_align_stack(stack_top); stack_top = PAGE_ALIGN(stack_top); + + if (unlikely(stack_top < mmap_min_addr) || + unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) + return -ENOMEM; + stack_shift = vma->vm_end - stack_top; bprm->p -= stack_shift; diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d7e9f74..09b13bb 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -90,7 +90,6 @@ int ext3_sync_file(struct file *file, int datasync) * storage */ if (needs_barrier) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); return ret; } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2..3f3ff5e 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -128,10 +128,9 @@ int ext4_sync_file(struct file *file, int datasync) (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, - NULL, BLKDEV_IFL_WAIT); + NULL); ret = jbd2_log_wait_commit(journal, commit_tid); } else if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); return ret; } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b..19aa0d4 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2566,7 +2566,7 @@ static inline void ext4_issue_discard(struct super_block *sb, discard_block = block + ext4_group_first_block_no(sb, block_group); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - ret = sb_issue_discard(sb, discard_block, count); + ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); if (ret == EOPNOTSUPP) { ext4_warning(sb, "discard not supported, disabling"); clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 81184d3..b47d2c9 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -577,7 +577,8 @@ int fat_free_clusters(struct inode *inode, int cluster) sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), - nr_clus * sbi->sec_per_clus); + nr_clus * sbi->sec_per_clus, + GFP_NOFS, 0); first_cl = cluster; } diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1736f23..970e682 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); - if (buffer_eopnotsupp(bhs[i])) { - clear_buffer_eopnotsupp(bhs[i]); - err = -EOPNOTSUPP; - } else if (!err && !buffer_uptodate(bhs[i])) + if (!err && !buffer_uptodate(bhs[i])) err = -EIO; } return err; diff --git a/fs/fcntl.c b/fs/fcntl.c index 6769fd0..f8cc34f 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -769,11 +769,15 @@ EXPORT_SYMBOL(kill_fasync); static int __init fcntl_init(void) { - /* please add new bits here to ensure allocation uniqueness */ - BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( + /* + * Please add new bits here to ensure allocation uniqueness. + * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY + * is defined as O_NONBLOCK on some platforms and not on others. + */ + BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | - O_TRUNC | O_APPEND | O_NONBLOCK | + O_TRUNC | O_APPEND | /* O_NONBLOCK | */ __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 7d9d06b..5581122 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -52,8 +52,6 @@ struct wb_writeback_work { #define CREATE_TRACE_POINTS #include -#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) - /* * We don't actually have pdflush, but this one is exported though /proc... */ @@ -71,6 +69,27 @@ int writeback_in_progress(struct backing_dev_info *bdi) return test_bit(BDI_writeback_running, &bdi->state); } +static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + /* + * For inodes on standard filesystems, we use superblock's bdi. For + * inodes on virtual filesystems, we want to use inode mapping's bdi + * because they can possibly point to something useful (think about + * block_dev filesystem). + */ + if (sb->s_bdi && sb->s_bdi != &noop_backing_dev_info) { + /* Some device inodes could play dirty tricks. Catch them... */ + WARN(bdi != sb->s_bdi && bdi_cap_writeback_dirty(bdi), + "Dirtiable inode bdi %s != sb bdi %s\n", + bdi->name, sb->s_bdi->name); + return sb->s_bdi; + } + return bdi; +} + static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { @@ -808,7 +827,7 @@ int bdi_writeback_thread(void *data) wb->last_active = jiffies; set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&bdi->work_list)) { + if (!list_empty(&bdi->work_list) || kthread_should_stop()) { __set_current_state(TASK_RUNNING); continue; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 69ad053..d367af1 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,7 +276,7 @@ static void flush_bg_queue(struct fuse_conn *fc) * Called with fc->lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) +__releases(fc->lock) { void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; req->end = NULL; @@ -306,8 +306,8 @@ __releases(&fc->lock) static void wait_answer_interruptible(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { if (signal_pending(current)) return; @@ -325,8 +325,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) } static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { if (!fc->no_interrupt) { /* Any signal may interrupt this */ @@ -905,8 +905,8 @@ static int request_pending(struct fuse_conn *fc) /* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { DECLARE_WAITQUEUE(wait, current); @@ -934,7 +934,7 @@ __acquires(&fc->lock) */ static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs, size_t nbytes, struct fuse_req *req) -__releases(&fc->lock) +__releases(fc->lock) { struct fuse_in_header ih; struct fuse_interrupt_in arg; @@ -1720,8 +1720,8 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) * This function releases and reacquires fc->lock */ static void end_requests(struct fuse_conn *fc, struct list_head *head) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { while (!list_empty(head)) { struct fuse_req *req; @@ -1744,8 +1744,8 @@ __acquires(&fc->lock) * locked). */ static void end_io_requests(struct fuse_conn *fc) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { while (!list_empty(&fc->io)) { struct fuse_req *req = @@ -1769,6 +1769,16 @@ __acquires(&fc->lock) } } +static void end_queued_requests(struct fuse_conn *fc) +__releases(fc->lock) +__acquires(fc->lock) +{ + fc->max_background = UINT_MAX; + flush_bg_queue(fc); + end_requests(fc, &fc->pending); + end_requests(fc, &fc->processing); +} + /* * Abort all requests. * @@ -1795,8 +1805,7 @@ void fuse_abort_conn(struct fuse_conn *fc) fc->connected = 0; fc->blocked = 0; end_io_requests(fc); - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); + end_queued_requests(fc); wake_up_all(&fc->waitq); wake_up_all(&fc->blocked_waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); @@ -1811,8 +1820,9 @@ int fuse_dev_release(struct inode *inode, struct file *file) if (fc) { spin_lock(&fc->lock); fc->connected = 0; - end_requests(fc, &fc->pending); - end_requests(fc, &fc->processing); + fc->blocked = 0; + end_queued_requests(fc); + wake_up_all(&fc->blocked_waitq); spin_unlock(&fc->lock); fuse_conn_put(fc); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 147c1f7..c822458 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1144,8 +1144,8 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req) /* Called under fc->lock, may release and reacquire it */ static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { struct fuse_inode *fi = get_fuse_inode(req->inode); loff_t size = i_size_read(req->inode); @@ -1183,8 +1183,8 @@ __acquires(&fc->lock) * Called with fc->lock */ void fuse_flush_writepages(struct inode *inode) -__releases(&fc->lock) -__acquires(&fc->lock) +__releases(fc->lock) +__acquires(fc->lock) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index cde1248..eb01f35 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -592,22 +592,13 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) lh->lh_hash = cpu_to_be32(hash); bh->b_end_io = end_buffer_write_sync; - if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) - goto skip_barrier; get_bh(bh); - submit_bh(WRITE_BARRIER | REQ_META, bh); - wait_on_buffer(bh); - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - set_buffer_uptodate(bh); - fs_info(sdp, "barrier sync failed - disabling barriers\n"); - set_bit(SDF_NOBARRIERS, &sdp->sd_flags); - lock_buffer(bh); -skip_barrier: - get_bh(bh); + if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) submit_bh(WRITE_SYNC | REQ_META, bh); - wait_on_buffer(bh); - } + else + submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); brelse(bh); @@ -932,7 +923,7 @@ int gfs2_logd(void *data) do { prepare_to_wait(&sdp->sd_logd_waitq, &wait, - TASK_UNINTERRUPTIBLE); + TASK_INTERRUPTIBLE); if (!gfs2_ail_flush_reqd(sdp) && !gfs2_jrnl_flush_reqd(sdp) && !kthread_should_stop()) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 171a744..38b3ea1 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -854,8 +854,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, if ((start + nr_sects) != blk) { rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, - BLKDEV_IFL_WAIT | - BLKDEV_IFL_BARRIER); + 0); if (rv) goto fail; nr_sects = 0; @@ -869,8 +868,7 @@ start_new_extent: } } if (nr_sects) { - rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); if (rv) goto fail; } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11..85a6883 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -137,34 +137,10 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); - if (journal->j_flags & JFS_BARRIER) { - ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - - /* - * Is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP) { - char b[BDEVNAME_SIZE]; - - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); - - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - ret = sync_dirty_buffer(bh); - } - } else { + if (journal->j_flags & JFS_BARRIER) + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA); + else ret = sync_dirty_buffer(bh); - } put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); @@ -318,7 +294,7 @@ void journal_commit_transaction(journal_t *journal) int first_tag = 0; int tag_flag; int i; - int write_op = WRITE; + int write_op = WRITE_SYNC; /* * First job: lock down the current transaction and wait for diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 5247e7f..6571a05 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -532,8 +532,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) */ if ((journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); if (!(journal->j_flags & JBD2_ABORT)) jbd2_journal_update_superblock(journal, 1); return 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7c068c1..bc6be8b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -134,25 +134,11 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); - if (ret == -EOPNOTSUPP) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); - - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); - ret = submit_bh(WRITE_SYNC_PLUG, bh); - } - } else { + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); + else ret = submit_bh(WRITE_SYNC_PLUG, bh); - } + *cbh = bh; return ret; } @@ -166,29 +152,8 @@ static int journal_wait_on_commit_record(journal_t *journal, { int ret = 0; -retry: clear_buffer_dirty(bh); wait_on_buffer(bh); - if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { - printk(KERN_WARNING - "JBD2: %s: disabling barries on %s - not supported " - "by device\n", __func__, journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); - - lock_buffer(bh); - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - bh->b_end_io = journal_end_buffer_io_sync; - - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (ret) { - unlock_buffer(bh); - return ret; - } - goto retry; - } if (unlikely(!buffer_uptodate(bh))) ret = -EIO; @@ -360,7 +325,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; - int write_op = WRITE; + int write_op = WRITE_SYNC; /* * First job: lock down the current transaction and wait for @@ -701,6 +666,16 @@ start_journal_io: } } + err = journal_finish_inode_data_buffers(journal, commit_transaction); + if (err) { + printk(KERN_WARNING + "JBD2: Detected IO errors while flushing file data " + "on %s\n", journal->j_devname); + if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) + jbd2_journal_abort(journal, err); + err = 0; + } + /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue @@ -709,8 +684,7 @@ start_journal_io: if (commit_transaction->t_flushed_data_blocks && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); /* Done it all: now write the commit record asynchronously. */ if (JBD2_HAS_INCOMPAT_FEATURE(journal, @@ -719,19 +693,6 @@ start_journal_io: &cbh, crc32_sum); if (err) __jbd2_journal_abort_hard(journal); - if (journal->j_flags & JBD2_BARRIER) - blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); - } - - err = journal_finish_inode_data_buffers(journal, commit_transaction); - if (err) { - printk(KERN_WARNING - "JBD2: Detected IO errors while flushing file data " - "on %s\n", journal->j_devname); - if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) - jbd2_journal_abort(journal, err); - err = 0; } /* Lo and behold: we have just managed to send a transaction to @@ -845,6 +806,11 @@ wait_for_iobuf: } if (!err && !is_journal_aborted(journal)) err = journal_wait_on_commit_record(journal, cbh); + if (JBD2_HAS_INCOMPAT_FEATURE(journal, + JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && + journal->j_flags & JBD2_BARRIER) { + blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); + } if (err) jbd2_journal_abort(journal, err); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index e20ee85..f3f3578 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -115,7 +115,7 @@ static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) inode_inc_link_count(dir); - inode = minix_new_inode(dir, mode, &err); + inode = minix_new_inode(dir, S_IFDIR | mode, &err); if (!inode) goto out_dir; diff --git a/fs/namespace.c b/fs/namespace.c index de402eb..a72eaab 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1484,13 +1484,30 @@ out_unlock: } /* + * Sanity check the flags to change_mnt_propagation. + */ + +static int flags_to_propagation_type(int flags) +{ + int type = flags & ~MS_REC; + + /* Fail if any non-propagation flags are set */ + if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) + return 0; + /* Only one propagation flag should be set */ + if (!is_power_of_2(type)) + return 0; + return type; +} + +/* * recursively change the type of the mountpoint. */ static int do_change_type(struct path *path, int flag) { struct vfsmount *m, *mnt = path->mnt; int recurse = flag & MS_REC; - int type = flag & ~MS_REC; + int type; int err = 0; if (!capable(CAP_SYS_ADMIN)) @@ -1499,6 +1516,10 @@ static int do_change_type(struct path *path, int flag) if (path->dentry != path->mnt->mnt_root) return -EINVAL; + type = flags_to_propagation_type(flag); + if (!type) + return -EINVAL; + down_write(&namespace_sem); if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6c2aad4..f7e13db 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,6 +63,7 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS + select SUNRPC_GSS help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4e7df2a..e734072 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -275,7 +275,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, sin1->sin6_scope_id != sin2->sin6_scope_id) return 0; - return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr); + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); } #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index eb51bd6..05bf3c0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -723,10 +723,6 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) default: BUG(); } - if (res < 0) - dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" - " - error %d!\n", - __func__, res); return res; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ec3966e..f4cbf0c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -431,7 +431,15 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf) goto out_err; error = server->nfs_client->rpc_ops->statfs(server, fh, &res); + if (unlikely(error == -ESTALE)) { + struct dentry *pd_dentry; + pd_dentry = dget_parent(dentry); + if (pd_dentry != NULL) { + nfs_zap_caches(pd_dentry->d_inode); + dput(pd_dentry); + } + } nfs_free_fattr(res.fattr); if (error < 0) goto out_err; diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 95932f5..4264377 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,6 +69,7 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL + select SUNRPC_GSS help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3dfef06..cf0d2ff 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -440,7 +440,7 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { static int nfs4_access_to_omode(u32 access) { - switch (access) { + switch (access & NFS4_SHARE_ACCESS_BOTH) { case NFS4_SHARE_ACCESS_READ: return O_RDONLY; case NFS4_SHARE_ACCESS_WRITE: diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 9222633..faa5078 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -178,17 +178,9 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) retry: set_buffer_dirty(nilfs->ns_sbh[0]); - if (nilfs_test_opt(sbi, BARRIER)) { err = __sync_dirty_buffer(nilfs->ns_sbh[0], - WRITE_SYNC | WRITE_BARRIER); - if (err == -EOPNOTSUPP) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - goto retry; - } + WRITE_SYNC | WRITE_FLUSH_FUA); } else { err = sync_dirty_buffer(nilfs->ns_sbh[0]); } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 4317f17..d277151 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -446,6 +446,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) nilfs_mdt_destroy(nilfs->ns_cpfile); nilfs_mdt_destroy(nilfs->ns_sufile); nilfs_mdt_destroy(nilfs->ns_dat); + nilfs_mdt_destroy(nilfs->ns_gc_dat); failed: nilfs_clear_recovery_info(&ri); @@ -774,9 +775,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, - BLKDEV_IFL_WAIT | - BLKDEV_IFL_BARRIER); + GFP_NOFS, 0); if (ret < 0) return ret; nblocks = 0; @@ -786,8 +785,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + GFP_NOFS, 0); return ret; } diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 215e12c..592fae5 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6672,7 +6672,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, last_page_bytes = PAGE_ALIGN(end); index = start >> PAGE_CACHE_SHIFT; do { - pages[numpages] = grab_cache_page(mapping, index); + pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS); if (!pages[numpages]) { ret = -ENOMEM; mlog_errno(ret); diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index ec6d123..c7ee03c 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -439,7 +439,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, - "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", + "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ @@ -453,7 +453,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, goto out; } - mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", + mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", (unsigned int)check.bc_crc32e, (unsigned int)crc); rc = -EIO; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 81296b4..77f05b8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -36,6 +36,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_INODE #include @@ -190,8 +191,16 @@ static int ocfs2_sync_file(struct file *file, int datasync) if (err) goto bail; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { + /* + * We still have to flush drive's caches to get data to the + * platter + */ + if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, + NULL); goto bail; + } journal = osb->journal->j_journal; err = jbd2_journal_force_commit(journal); @@ -774,7 +783,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); BUG_ON(abs_from & (inode->i_blkbits - 1)); - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) { ret = -ENOMEM; mlog_errno(ret); @@ -2329,7 +2338,7 @@ out_dio: BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || - ((file->f_flags & O_DIRECT) && has_refcount)) { + ((file->f_flags & O_DIRECT) && !direct_io)) { ret = filemap_fdatawrite_range(file->f_mapping, pos, pos + count - 1); if (ret < 0) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 0492464..eece3e0 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -488,7 +488,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, OCFS2_BH_IGNORE_CACHE); } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); - if (!status) + /* + * If buffer is in jbd, then its checksum may not have been + * computed as yet. + */ + if (!status && !buffer_jbd(bh)) status = ocfs2_validate_inode_block(osb->sb, bh); } if (status < 0) { diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index af2b8fe..4c18f4a 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -74,9 +74,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, /* * Another node might have truncated while we were waiting on * cluster locks. + * We don't check size == 0 before the shift. This is borrowed + * from do_generic_file_read. */ - last_index = size >> PAGE_CACHE_SHIFT; - if (page->index > last_index) { + last_index = (size - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!size || page->index > last_index)) { ret = -EINVAL; goto out; } @@ -107,7 +109,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, * because the "write" would invalidate their data. */ if (page->index == last_index) - len = size & ~PAGE_CACHE_MASK; + len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, &fsdata, di_bh, page); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index f171b51..a00dda2 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -472,32 +472,23 @@ leave: return status; } -static int ocfs2_mknod_locked(struct ocfs2_super *osb, - struct inode *dir, - struct inode *inode, - dev_t dev, - struct buffer_head **new_fe_bh, - struct buffer_head *parent_fe_bh, - handle_t *handle, - struct ocfs2_alloc_context *inode_ac) +static int __ocfs2_mknod_locked(struct inode *dir, + struct inode *inode, + dev_t dev, + struct buffer_head **new_fe_bh, + struct buffer_head *parent_fe_bh, + handle_t *handle, + struct ocfs2_alloc_context *inode_ac, + u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) { int status = 0; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_dinode *fe = NULL; struct ocfs2_extent_list *fel; - u64 suballoc_loc, fe_blkno = 0; - u16 suballoc_bit; u16 feat; *new_fe_bh = NULL; - status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, - inode_ac, &suballoc_loc, - &suballoc_bit, &fe_blkno); - if (status < 0) { - mlog_errno(status); - goto leave; - } - /* populate as many fields early on as possible - many of * these are used by the support functions here and in * callers. */ @@ -591,6 +582,34 @@ leave: return status; } +static int ocfs2_mknod_locked(struct ocfs2_super *osb, + struct inode *dir, + struct inode *inode, + dev_t dev, + struct buffer_head **new_fe_bh, + struct buffer_head *parent_fe_bh, + handle_t *handle, + struct ocfs2_alloc_context *inode_ac) +{ + int status = 0; + u64 suballoc_loc, fe_blkno = 0; + u16 suballoc_bit; + + *new_fe_bh = NULL; + + status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh, + inode_ac, &suballoc_loc, + &suballoc_bit, &fe_blkno); + if (status < 0) { + mlog_errno(status); + return status; + } + + return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, + parent_fe_bh, handle, inode_ac, + fe_blkno, suballoc_loc, suballoc_bit); +} + static int ocfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) @@ -1852,61 +1871,117 @@ bail: return status; } -static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, - struct inode **ret_orphan_dir, - u64 blkno, - char *name, - struct ocfs2_dir_lookup_result *lookup) +static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb, + struct inode **ret_orphan_dir, + struct buffer_head **ret_orphan_dir_bh) { struct inode *orphan_dir_inode; struct buffer_head *orphan_dir_bh = NULL; - int status = 0; - - status = ocfs2_blkno_stringify(blkno, name); - if (status < 0) { - mlog_errno(status); - return status; - } + int ret = 0; orphan_dir_inode = ocfs2_get_system_file_inode(osb, ORPHAN_DIR_SYSTEM_INODE, osb->slot_num); if (!orphan_dir_inode) { - status = -ENOENT; - mlog_errno(status); - return status; + ret = -ENOENT; + mlog_errno(ret); + return ret; } mutex_lock(&orphan_dir_inode->i_mutex); - status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); - if (status < 0) { - mlog_errno(status); - goto leave; + ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); + if (ret < 0) { + mutex_unlock(&orphan_dir_inode->i_mutex); + iput(orphan_dir_inode); + + mlog_errno(ret); + return ret; } - status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, - orphan_dir_bh, name, - OCFS2_ORPHAN_NAMELEN, lookup); - if (status < 0) { - ocfs2_inode_unlock(orphan_dir_inode, 1); + *ret_orphan_dir = orphan_dir_inode; + *ret_orphan_dir_bh = orphan_dir_bh; - mlog_errno(status); - goto leave; + return 0; +} + +static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, + struct buffer_head *orphan_dir_bh, + u64 blkno, + char *name, + struct ocfs2_dir_lookup_result *lookup) +{ + int ret; + struct ocfs2_super *osb = OCFS2_SB(orphan_dir_inode->i_sb); + + ret = ocfs2_blkno_stringify(blkno, name); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + ret = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode, + orphan_dir_bh, name, + OCFS2_ORPHAN_NAMELEN, lookup); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + return 0; +} + +/** + * ocfs2_prepare_orphan_dir() - Prepare an orphan directory for + * insertion of an orphan. + * @osb: ocfs2 file system + * @ret_orphan_dir: Orphan dir inode - returned locked! + * @blkno: Actual block number of the inode to be inserted into orphan dir. + * @lookup: dir lookup result, to be passed back into functions like + * ocfs2_orphan_add + * + * Returns zero on success and the ret_orphan_dir, name and lookup + * fields will be populated. + * + * Returns non-zero on failure. + */ +static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, + struct inode **ret_orphan_dir, + u64 blkno, + char *name, + struct ocfs2_dir_lookup_result *lookup) +{ + struct inode *orphan_dir_inode = NULL; + struct buffer_head *orphan_dir_bh = NULL; + int ret = 0; + + ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode, + &orphan_dir_bh); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, + blkno, name, lookup); + if (ret < 0) { + mlog_errno(ret); + goto out; } *ret_orphan_dir = orphan_dir_inode; -leave: - if (status) { +out: + brelse(orphan_dir_bh); + + if (ret) { + ocfs2_inode_unlock(orphan_dir_inode, 1); mutex_unlock(&orphan_dir_inode->i_mutex); iput(orphan_dir_inode); } - brelse(orphan_dir_bh); - - mlog_exit(status); - return status; + mlog_exit(ret); + return ret; } static int ocfs2_orphan_add(struct ocfs2_super *osb, @@ -2053,6 +2128,99 @@ leave: return status; } +/** + * ocfs2_prep_new_orphaned_file() - Prepare the orphan dir to recieve a newly + * allocated file. This is different from the typical 'add to orphan dir' + * operation in that the inode does not yet exist. This is a problem because + * the orphan dir stringifies the inode block number to come up with it's + * dirent. Obviously if the inode does not yet exist we have a chicken and egg + * problem. This function works around it by calling deeper into the orphan + * and suballoc code than other callers. Use this only by necessity. + * @dir: The directory which this inode will ultimately wind up under - not the + * orphan dir! + * @dir_bh: buffer_head the @dir inode block + * @orphan_name: string of length (CFS2_ORPHAN_NAMELEN + 1). Will be filled + * with the string to be used for orphan dirent. Pass back to the orphan dir + * code. + * @ret_orphan_dir: orphan dir inode returned to be passed back into orphan + * dir code. + * @ret_di_blkno: block number where the new inode will be allocated. + * @orphan_insert: Dir insert context to be passed back into orphan dir code. + * @ret_inode_ac: Inode alloc context to be passed back to the allocator. + * + * Returns zero on success and the ret_orphan_dir, name and lookup + * fields will be populated. + * + * Returns non-zero on failure. + */ +static int ocfs2_prep_new_orphaned_file(struct inode *dir, + struct buffer_head *dir_bh, + char *orphan_name, + struct inode **ret_orphan_dir, + u64 *ret_di_blkno, + struct ocfs2_dir_lookup_result *orphan_insert, + struct ocfs2_alloc_context **ret_inode_ac) +{ + int ret; + u64 di_blkno; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); + struct inode *orphan_dir = NULL; + struct buffer_head *orphan_dir_bh = NULL; + struct ocfs2_alloc_context *inode_ac = NULL; + + ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir, &orphan_dir_bh); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + /* reserve an inode spot */ + ret = ocfs2_reserve_new_inode(osb, &inode_ac); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_new_inode_loc(dir, dir_bh, inode_ac, + &di_blkno); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = __ocfs2_prepare_orphan_dir(orphan_dir, orphan_dir_bh, + di_blkno, orphan_name, orphan_insert); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + +out: + if (ret == 0) { + *ret_orphan_dir = orphan_dir; + *ret_di_blkno = di_blkno; + *ret_inode_ac = inode_ac; + /* + * orphan_name and orphan_insert are already up to + * date via prepare_orphan_dir + */ + } else { + /* Unroll reserve_new_inode* */ + if (inode_ac) + ocfs2_free_alloc_context(inode_ac); + + /* Unroll orphan dir locking */ + mutex_unlock(&orphan_dir->i_mutex); + ocfs2_inode_unlock(orphan_dir, 1); + iput(orphan_dir); + } + + brelse(orphan_dir_bh); + + return 0; +} + int ocfs2_create_inode_in_orphan(struct inode *dir, int mode, struct inode **new_inode) @@ -2068,6 +2236,8 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, struct buffer_head *new_di_bh = NULL; struct ocfs2_alloc_context *inode_ac = NULL; struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; + u64 uninitialized_var(di_blkno), suballoc_loc; + u16 suballoc_bit; status = ocfs2_inode_lock(dir, &parent_di_bh, 1); if (status < 0) { @@ -2076,20 +2246,9 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, return status; } - /* - * We give the orphan dir the root blkno to fake an orphan name, - * and allocate enough space for our insertion. - */ - status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, - osb->root_blkno, - orphan_name, &orphan_insert); - if (status < 0) { - mlog_errno(status); - goto leave; - } - - /* reserve an inode spot */ - status = ocfs2_reserve_new_inode(osb, &inode_ac); + status = ocfs2_prep_new_orphaned_file(dir, parent_di_bh, + orphan_name, &orphan_dir, + &di_blkno, &orphan_insert, &inode_ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); @@ -2116,17 +2275,20 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, goto leave; did_quota_inode = 1; - inode->i_nlink = 0; - /* do the real work now. */ - status = ocfs2_mknod_locked(osb, dir, inode, - 0, &new_di_bh, parent_di_bh, handle, - inode_ac); + status = ocfs2_claim_new_inode_at_loc(handle, dir, inode_ac, + &suballoc_loc, + &suballoc_bit, di_blkno); if (status < 0) { mlog_errno(status); goto leave; } - status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name); + inode->i_nlink = 0; + /* do the real work now. */ + status = __ocfs2_mknod_locked(dir, inode, + 0, &new_di_bh, parent_di_bh, handle, + inode_ac, di_blkno, suballoc_loc, + suballoc_bit); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 73a11cc..0afeda831 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2960,7 +2960,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (map_end & (PAGE_CACHE_SIZE - 1)) to = map_end & (PAGE_CACHE_SIZE - 1); - page = grab_cache_page(mapping, page_index); + page = find_or_create_page(mapping, page_index, GFP_NOFS); /* * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page @@ -3179,7 +3179,8 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb, if (map_end > end) map_end = end; - page = grab_cache_page(context->inode->i_mapping, page_index); + page = find_or_create_page(context->inode->i_mapping, + page_index, GFP_NOFS); BUG_ON(!page); wait_on_page_writeback(page); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index a8e6a95..8a286f5 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -57,11 +57,28 @@ struct ocfs2_suballoc_result { u64 sr_bg_blkno; /* The bg we allocated from. Set to 0 when a block group is contiguous. */ + u64 sr_bg_stable_blkno; /* + * Doesn't change, always + * set to target block + * group descriptor + * block. + */ u64 sr_blkno; /* The first allocated block */ unsigned int sr_bit_offset; /* The bit in the bg */ unsigned int sr_bits; /* How many bits we claimed */ }; +static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) +{ + if (res->sr_blkno == 0) + return 0; + + if (res->sr_bg_blkno) + return res->sr_bg_blkno; + + return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset); +} + static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); @@ -138,6 +155,10 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) brelse(ac->ac_bh); ac->ac_bh = NULL; ac->ac_resv = NULL; + if (ac->ac_find_loc_priv) { + kfree(ac->ac_find_loc_priv); + ac->ac_find_loc_priv = NULL; + } } void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) @@ -1678,6 +1699,15 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, if (!ret) ocfs2_bg_discontig_fix_result(ac, gd, res); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; + + if (ac->ac_find_loc_only) + goto out_loc_only; + ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, res->sr_bits, le16_to_cpu(gd->bg_chain)); @@ -1691,6 +1721,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, if (ret < 0) mlog_errno(ret); +out_loc_only: *bits_left = le16_to_cpu(gd->bg_free_bits_count); out: @@ -1708,7 +1739,6 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; - u32 tmp_used; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1770,6 +1800,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, if (!status) ocfs2_bg_discontig_fix_result(ac, bg, res); + /* + * sr_bg_blkno might have been changed by + * ocfs2_bg_discontig_fix_result + */ + res->sr_bg_stable_blkno = group_bh->b_blocknr; /* * Keep track of previous block descriptor read. When @@ -1796,22 +1831,17 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, } } - /* Ok, claim our bits now: set the info on dinode, chainlist - * and then the group */ - status = ocfs2_journal_access_di(handle, - INODE_CACHE(alloc_inode), - ac->ac_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (status < 0) { + if (ac->ac_find_loc_only) + goto out_loc_only; + + status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (status) { mlog_errno(status); goto bail; } - tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); - fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used); - le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits); - ocfs2_journal_dirty(handle, ac->ac_bh); - status = ocfs2_block_group_set_bits(handle, alloc_inode, bg, @@ -1826,6 +1856,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, (unsigned long long)le64_to_cpu(fe->i_blkno)); +out_loc_only: *bits_left = le16_to_cpu(bg->bg_free_bits_count); bail: brelse(group_bh); @@ -1845,6 +1876,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, int status; u16 victim, i; u16 bits_left = 0; + u64 hint = ac->ac_last_group; struct ocfs2_chain_list *cl; struct ocfs2_dinode *fe; @@ -1872,7 +1904,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, goto bail; } - res->sr_bg_blkno = ac->ac_last_group; + res->sr_bg_blkno = hint; if (res->sr_bg_blkno) { /* Attempt to short-circuit the usual search mechanism * by jumping straight to the most recently used @@ -1896,8 +1928,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); - if (!status) + if (!status) { + hint = ocfs2_group_from_res(res); goto set_hint; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1920,8 +1954,10 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, ac->ac_chain = i; status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); - if (!status) + if (!status) { + hint = ocfs2_group_from_res(res); break; + } if (status < 0 && status != -ENOSPC) { mlog_errno(status); goto bail; @@ -1936,7 +1972,7 @@ set_hint: if (bits_left < min_bits) ac->ac_last_group = 0; else - ac->ac_last_group = res->sr_bg_blkno; + ac->ac_last_group = hint; } bail: @@ -2016,6 +2052,136 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir, OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; } +int ocfs2_find_new_inode_loc(struct inode *dir, + struct buffer_head *parent_fe_bh, + struct ocfs2_alloc_context *ac, + u64 *fe_blkno) +{ + int ret; + handle_t *handle = NULL; + struct ocfs2_suballoc_result *res; + + BUG_ON(!ac); + BUG_ON(ac->ac_bits_given != 0); + BUG_ON(ac->ac_bits_wanted != 1); + BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); + + res = kzalloc(sizeof(*res), GFP_NOFS); + if (res == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); + + /* + * The handle started here is for chain relink. Alternatively, + * we could just disable relink for these calls. + */ + handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + handle = NULL; + mlog_errno(ret); + goto out; + } + + /* + * This will instruct ocfs2_claim_suballoc_bits and + * ocfs2_search_one_group to search but save actual allocation + * for later. + */ + ac->ac_find_loc_only = 1; + + ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ac->ac_find_loc_priv = res; + *fe_blkno = res->sr_blkno; + +out: + if (handle) + ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); + + if (ret) + kfree(res); + + return ret; +} + +int ocfs2_claim_new_inode_at_loc(handle_t *handle, + struct inode *dir, + struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, + u16 *suballoc_bit, + u64 di_blkno) +{ + int ret; + u16 chain; + struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv; + struct buffer_head *bg_bh = NULL; + struct ocfs2_group_desc *bg; + struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data; + + /* + * Since di_blkno is being passed back in, we check for any + * inconsistencies which may have happened between + * calls. These are code bugs as di_blkno is not expected to + * change once returned from ocfs2_find_new_inode_loc() + */ + BUG_ON(res->sr_blkno != di_blkno); + + ret = ocfs2_read_group_descriptor(ac->ac_inode, di, + res->sr_bg_stable_blkno, &bg_bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + bg = (struct ocfs2_group_desc *) bg_bh->b_data; + chain = le16_to_cpu(bg->bg_chain); + + ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle, + ac->ac_bh, res->sr_bits, + chain); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_block_group_set_bits(handle, + ac->ac_inode, + bg, + bg_bh, + res->sr_bit_offset, + res->sr_bits); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits, + (unsigned long long)di_blkno); + + atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs); + + BUG_ON(res->sr_bits != 1); + + *suballoc_loc = res->sr_bg_blkno; + *suballoc_bit = res->sr_bit_offset; + ac->ac_bits_given++; + ocfs2_save_inode_ac_group(dir, ac); + +out: + brelse(bg_bh); + + return ret; +} + int ocfs2_claim_new_inode(handle_t *handle, struct inode *dir, struct buffer_head *parent_fe_bh, @@ -2567,7 +2733,8 @@ out: * suballoc_bit. */ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, - u16 *suballoc_slot, u16 *suballoc_bit) + u16 *suballoc_slot, u64 *group_blkno, + u16 *suballoc_bit) { int status; struct buffer_head *inode_bh = NULL; @@ -2604,6 +2771,8 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); if (suballoc_bit) *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit); + if (group_blkno) + *group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc); bail: brelse(inode_bh); @@ -2621,7 +2790,8 @@ bail: */ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct inode *suballoc, - struct buffer_head *alloc_bh, u64 blkno, + struct buffer_head *alloc_bh, + u64 group_blkno, u64 blkno, u16 bit, int *res) { struct ocfs2_dinode *alloc_di; @@ -2642,10 +2812,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, goto bail; } - if (alloc_di->i_suballoc_loc) - bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc); - else - bg_blkno = ocfs2_which_suballoc_group(blkno, bit); + bg_blkno = group_blkno ? group_blkno : + ocfs2_which_suballoc_group(blkno, bit); status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno, &group_bh); if (status < 0) { @@ -2680,6 +2848,7 @@ bail: int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) { int status; + u64 group_blkno = 0; u16 suballoc_bit = 0, suballoc_slot = 0; struct inode *inode_alloc_inode; struct buffer_head *alloc_bh = NULL; @@ -2687,7 +2856,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) mlog_entry("blkno: %llu", (unsigned long long)blkno); status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, - &suballoc_bit); + &group_blkno, &suballoc_bit); if (status < 0) { mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status); goto bail; @@ -2715,7 +2884,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) } status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, - blkno, suballoc_bit, res); + group_blkno, blkno, suballoc_bit, res); if (status < 0) mlog(ML_ERROR, "test suballoc bit failed %d\n", status); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index a017dd3..b8afabf 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -56,6 +56,9 @@ struct ocfs2_alloc_context { u64 ac_max_block; /* Highest block number to allocate. 0 is is the same as ~0 - unlimited */ + int ac_find_loc_only; /* hack for reflink operation ordering */ + struct ocfs2_suballoc_result *ac_find_loc_priv; /* */ + struct ocfs2_alloc_reservation *ac_resv; }; @@ -197,4 +200,22 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, struct ocfs2_alloc_context **meta_ac); int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res); + + + +/* + * The following two interfaces are for ocfs2_create_inode_in_orphan(). + */ +int ocfs2_find_new_inode_loc(struct inode *dir, + struct buffer_head *parent_fe_bh, + struct ocfs2_alloc_context *ac, + u64 *fe_blkno); + +int ocfs2_claim_new_inode_at_loc(handle_t *handle, + struct inode *dir, + struct ocfs2_alloc_context *ac, + u64 *suballoc_loc, + u16 *suballoc_bit, + u64 di_blkno); + #endif /* _CHAINALLOC_H_ */ diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 79fbf3f..6dfbee0 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -352,6 +352,7 @@ static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); free_part_stats(p); + free_part_info(p); kfree(p); } @@ -401,7 +402,8 @@ static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); struct hd_struct *add_partition(struct gendisk *disk, int partno, - sector_t start, sector_t len, int flags) + sector_t start, sector_t len, int flags, + struct partition_meta_info *info) { struct hd_struct *p; dev_t devt = MKDEV(0, 0); @@ -438,6 +440,14 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->partno = partno; p->policy = get_disk_ro(disk); + if (info) { + struct partition_meta_info *pinfo = alloc_part_info(disk); + if (!pinfo) + goto out_free_stats; + memcpy(pinfo, info, sizeof(*info)); + p->info = pinfo; + } + dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) dev_set_name(pdev, "%sp%d", dname, partno); @@ -451,7 +461,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(p, &devt); if (err) - goto out_free_stats; + goto out_free_info; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ @@ -481,6 +491,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, return p; +out_free_info: + free_part_info(p); out_free_stats: free_part_stats(p); out_free: @@ -642,6 +654,7 @@ rescan: /* add partitions */ for (p = 1; p < state->limit; p++) { sector_t size, from; + struct partition_meta_info *info = NULL; size = state->parts[p].size; if (!size) @@ -675,8 +688,12 @@ rescan: size = get_capacity(disk) - from; } } + + if (state->parts[p].has_info) + info = &state->parts[p].info; part = add_partition(disk, p, from, size, - state->parts[p].flags); + state->parts[p].flags, + &state->parts[p].info); if (IS_ERR(part)) { printk(KERN_ERR " %s: p%d could not be added: %ld\n", disk->disk_name, p, -PTR_ERR(part)); diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 8e4e103..d68bf4d 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h @@ -1,5 +1,6 @@ #include #include +#include /* * add_gd_partition adds a partitions details to the devices partition @@ -12,6 +13,8 @@ struct parsed_partitions { sector_t from; sector_t size; int flags; + bool has_info; + struct partition_meta_info info; } parts[DISK_MAX_PARTS]; int next; int limit; diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index dbb44d4..ac0ccb5 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c @@ -94,6 +94,7 @@ * ************************************************************/ #include +#include #include #include #include "check.h" @@ -604,6 +605,7 @@ int efi_partition(struct parsed_partitions *state) gpt_entry *ptes = NULL; u32 i; unsigned ssz = bdev_logical_block_size(state->bdev) / 512; + u8 unparsed_guid[37]; if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { kfree(gpt); @@ -614,6 +616,9 @@ int efi_partition(struct parsed_partitions *state) pr_debug("GUID Partition Table is valid! Yea!\n"); for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { + struct partition_meta_info *info; + unsigned label_count = 0; + unsigned label_max; u64 start = le64_to_cpu(ptes[i].starting_lba); u64 size = le64_to_cpu(ptes[i].ending_lba) - le64_to_cpu(ptes[i].starting_lba) + 1ULL; @@ -627,6 +632,26 @@ int efi_partition(struct parsed_partitions *state) if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_RAID_GUID)) state->parts[i + 1].flags = ADDPART_FLAG_RAID; + + info = &state->parts[i + 1].info; + /* Instead of doing a manual swap to big endian, reuse the + * common ASCII hex format as the interim. + */ + efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid); + part_pack_uuid(unparsed_guid, info->uuid); + + /* Naively convert UTF16-LE to 7 bits. */ + label_max = min(sizeof(info->volname) - 1, + sizeof(ptes[i].partition_name)); + info->volname[label_max] = 0; + while (label_count < label_max) { + u8 c = ptes[i].partition_name[label_count] & 0xff; + if (c && !isprint(c)) + c = '!'; + info->volname[label_count] = c; + label_count++; + } + state->parts[i + 1].has_info = true; } kfree(ptes); kfree(gpt); diff --git a/fs/proc/page.c b/fs/proc/page.c index 180cf5a..3b8b456 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -146,7 +146,7 @@ u64 stable_page_flags(struct page *page) u |= kpf_copy_bit(k, KPF_HWPOISON, PG_hwpoison); #endif -#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR +#ifdef CONFIG_ARCH_USES_PG_UNCACHED u |= kpf_copy_bit(k, KPF_UNCACHED, PG_uncached); #endif diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 439fc1f..271afc4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; if (vma->vm_flags & VM_GROWSDOWN) - start += PAGE_SIZE; + if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) + start += PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6846371..91f080c 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -152,8 +152,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync) barrier_done = reiserfs_commit_for_inode(inode); reiserfs_write_unlock(inode->i_sb); if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); if (barrier_done < 0) return barrier_done; return (err < 0) ? -EIO : 0; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 812e2c0..076c8b1 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -138,13 +138,6 @@ static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) return 0; } -static void disable_barrier(struct super_block *s) -{ - REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); - printk("reiserfs: disabling flush barriers on %s\n", - reiserfs_bdevname(s)); -} - static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block *sb) { @@ -677,30 +670,6 @@ static void submit_ordered_buffer(struct buffer_head *bh) submit_bh(WRITE, bh); } -static int submit_barrier_buffer(struct buffer_head *bh) -{ - get_bh(bh); - bh->b_end_io = reiserfs_end_ordered_io; - clear_buffer_dirty(bh); - if (!buffer_uptodate(bh)) - BUG(); - return submit_bh(WRITE_BARRIER, bh); -} - -static void check_barrier_completion(struct super_block *s, - struct buffer_head *bh) -{ - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - disable_barrier(s); - set_buffer_uptodate(bh); - set_buffer_dirty(bh); - reiserfs_write_unlock(s); - sync_dirty_buffer(bh); - reiserfs_write_lock(s); - } -} - #define CHUNK_SIZE 32 struct buffer_chunk { struct buffer_head *bh[CHUNK_SIZE]; @@ -1009,7 +978,6 @@ static int flush_commit_list(struct super_block *s, struct buffer_head *tbh = NULL; unsigned int trans_id = jl->j_trans_id; struct reiserfs_journal *journal = SB_JOURNAL(s); - int barrier = 0; int retval = 0; int write_len; @@ -1094,24 +1062,6 @@ static int flush_commit_list(struct super_block *s, } atomic_dec(&journal->j_async_throttle); - /* We're skipping the commit if there's an error */ - if (retval || reiserfs_is_journal_aborted(journal)) - barrier = 0; - - /* wait on everything written so far before writing the commit - * if we are in barrier mode, send the commit down now - */ - barrier = reiserfs_barrier_flush(s); - if (barrier) { - int ret; - lock_buffer(jl->j_commit_bh); - ret = submit_barrier_buffer(jl->j_commit_bh); - if (ret == -EOPNOTSUPP) { - set_buffer_uptodate(jl->j_commit_bh); - disable_barrier(s); - barrier = 0; - } - } for (i = 0; i < (jl->j_len + 1); i++) { bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); @@ -1143,27 +1093,22 @@ static int flush_commit_list(struct super_block *s, BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); - if (!barrier) { - /* If there was a write error in the journal - we can't commit - * this transaction - it will be invalid and, if successful, - * will just end up propagating the write error out to - * the file system. */ - if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { - if (buffer_dirty(jl->j_commit_bh)) - BUG(); - mark_buffer_dirty(jl->j_commit_bh) ; - reiserfs_write_unlock(s); - sync_dirty_buffer(jl->j_commit_bh) ; - reiserfs_write_lock(s); - } - } else { + /* If there was a write error in the journal - we can't commit + * this transaction - it will be invalid and, if successful, + * will just end up propagating the write error out to + * the file system. */ + if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { + if (buffer_dirty(jl->j_commit_bh)) + BUG(); + mark_buffer_dirty(jl->j_commit_bh) ; reiserfs_write_unlock(s); - wait_on_buffer(jl->j_commit_bh); + if (reiserfs_barrier_flush(s)) + __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); + else + sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock(s); } - check_barrier_completion(s, jl->j_commit_bh); - /* If there was a write error in the journal - we can't commit this * transaction - it will be invalid and, if successful, will just end * up propagating the write error out to the filesystem. */ @@ -1319,26 +1264,15 @@ static int _update_journal_header_block(struct super_block *sb, jh->j_first_unflushed_offset = cpu_to_le32(offset); jh->j_mount_id = cpu_to_le32(journal->j_mount_id); - if (reiserfs_barrier_flush(sb)) { - int ret; - lock_buffer(journal->j_header_bh); - ret = submit_barrier_buffer(journal->j_header_bh); - if (ret == -EOPNOTSUPP) { - set_buffer_uptodate(journal->j_header_bh); - disable_barrier(sb); - goto sync; - } - reiserfs_write_unlock(sb); - wait_on_buffer(journal->j_header_bh); - reiserfs_write_lock(sb); - check_barrier_completion(sb, journal->j_header_bh); - } else { - sync: - set_buffer_dirty(journal->j_header_bh); - reiserfs_write_unlock(sb); + set_buffer_dirty(journal->j_header_bh); + reiserfs_write_unlock(sb); + + if (reiserfs_barrier_flush(sb)) + __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); + else sync_dirty_buffer(journal->j_header_bh); - reiserfs_write_lock(sb); - } + + reiserfs_write_lock(sb); if (!buffer_uptodate(journal->j_header_bh)) { reiserfs_warning(sb, "journal-837", "IO error during journal replay"); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 1b27b56..da3fefe 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) char *p; p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); - if (p) + if (!IS_ERR(p)) memmove(last_sysfs_file, p, strlen(p) + 1); /* need attr_sd for attr and ops, its parent for kobj */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ea79072..1846a0d 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -440,12 +440,7 @@ _xfs_buf_find( ASSERT(btp == bp->b_target); if (bp->b_file_offset == range_base && bp->b_buffer_length == range_length) { - /* - * If we look at something, bring it to the - * front of the list for next time. - */ atomic_inc(&bp->b_hold); - list_move(&bp->b_hash_list, &hash->bh_list); goto found; } } @@ -929,19 +924,7 @@ xfs_buf_iodone_work( xfs_buf_t *bp = container_of(work, xfs_buf_t, b_iodone_work); - /* - * We can get an EOPNOTSUPP to ordered writes. Here we clear the - * ordered flag and reissue them. Because we can't tell the higher - * layers directly that they should not issue ordered I/O anymore, they - * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. - */ - if ((bp->b_error == EOPNOTSUPP) && - (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { - trace_xfs_buf_ordered_retry(bp, _RET_IP_); - bp->b_flags &= ~XBF_ORDERED; - bp->b_flags |= _XFS_BARRIER_FAILED; - xfs_buf_iorequest(bp); - } else if (bp->b_iodone) + if (bp->b_iodone) (*(bp->b_iodone))(bp); else if (bp->b_flags & XBF_ASYNC) xfs_buf_relse(bp); @@ -1200,7 +1183,7 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_ORDERED) { ASSERT(!(bp->b_flags & XBF_READ)); - rw = WRITE_BARRIER; + rw = WRITE_FLUSH_FUA; } else if (bp->b_flags & XBF_LOG_BUFFER) { ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); bp->b_flags &= ~_XBF_RUN_QUEUES; @@ -1443,8 +1426,7 @@ xfs_alloc_bufhash( { unsigned int i; - btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */ - btp->bt_hashmask = (1 << btp->bt_hashshift) - 1; + btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t)); for (i = 0; i < (1 << btp->bt_hashshift); i++) { @@ -1938,7 +1920,8 @@ xfs_buf_init(void) if (!xfs_buf_zone) goto out; - xfslogd_workqueue = create_workqueue("xfslogd"); + xfslogd_workqueue = alloc_workqueue("xfslogd", + WQ_RESCUER | WQ_HIGHPRI, 1); if (!xfslogd_workqueue) goto out_free_buf_zone; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index d072e5f..9d021c7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -86,14 +86,6 @@ typedef enum { */ #define _XBF_PAGE_LOCKED (1 << 22) -/* - * If we try a barrier write, but it fails we have to communicate - * this to the upper layers. Unfortunately b_error gets overwritten - * when the buffer is re-issued so we have to add another flag to - * keep this information. - */ -#define _XFS_BARRIER_FAILED (1 << 23) - typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ @@ -114,8 +106,7 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ - { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } + { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } typedef enum { @@ -137,7 +128,6 @@ typedef struct xfs_buftarg { size_t bt_smask; /* per device buffer hash table */ - uint bt_hashmask; uint bt_hashshift; xfs_bufhash_t *bt_hash; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 237f5ff..3b9e626 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -785,6 +785,8 @@ xfs_ioc_fsgetxattr( { struct fsxattr fa; + memset(&fa, 0, sizeof(struct fsxattr)); + xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; @@ -907,6 +909,13 @@ xfs_ioctl_setattr( return XFS_ERROR(EIO); /* + * Disallow 32bit project ids because on-disk structure + * is 16bit only. + */ + if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) + return XFS_ERROR(EINVAL); + + /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 68be25d..b1fc2a6 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -664,7 +664,7 @@ xfs_vn_fiemap( fieinfo->fi_extents_max + 1; bm.bmv_count = min_t(__s32, bm.bmv_count, (PAGE_SIZE * 16 / sizeof(struct getbmapx))); - bm.bmv_iflags = BMV_IF_PREALLOC; + bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) bm.bmv_iflags |= BMV_IF_ATTRFORK; if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a4e0797..08fd310 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -693,8 +693,7 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, - BLKDEV_IFL_WAIT); + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); } STATIC void diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index be5dffd..8fe311a 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -325,7 +325,6 @@ DEFINE_BUF_EVENT(xfs_buf_lock); DEFINE_BUF_EVENT(xfs_buf_lock_done); DEFINE_BUF_EVENT(xfs_buf_cond_lock); DEFINE_BUF_EVENT(xfs_buf_unlock); -DEFINE_BUF_EVENT(xfs_buf_ordered_retry); DEFINE_BUF_EVENT(xfs_buf_iowait); DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 23f14e5..f90dadd 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5533,12 +5533,24 @@ xfs_getbmap( map[i].br_startblock)) goto out_free_map; - nexleft--; bmv->bmv_offset = out[cur_ext].bmv_offset + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); + + /* + * In case we don't want to return the hole, + * don't increase cur_ext so that we can reuse + * it in the next loop. + */ + if ((iflags & BMV_IF_NO_HOLES) && + map[i].br_startblock == HOLESTARTBLOCK) { + memset(&out[cur_ext], 0, sizeof(out[cur_ext])); + continue; + } + + nexleft--; bmv->bmv_entries++; cur_ext++; } diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 7cf7220..87c2e9d 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -114,8 +114,10 @@ struct getbmapx { #define BMV_IF_NO_DMAPI_READ 0x2 /* Do not generate DMAPI read event */ #define BMV_IF_PREALLOC 0x4 /* rtn status BMV_OF_PREALLOC if req */ #define BMV_IF_DELALLOC 0x8 /* rtn status BMV_OF_DELALLOC if req */ +#define BMV_IF_NO_HOLES 0x10 /* Do not return holes */ #define BMV_IF_VALID \ - (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC) + (BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC| \ + BMV_IF_DELALLOC|BMV_IF_NO_HOLES) /* bmv_oflags values - returned for each non-header segment */ #define BMV_OF_PREALLOC 0x1 /* segment = unwritten pre-allocation */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 33f718f..ba8e36e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -917,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp) l = iclog->ic_log; /* - * If the _XFS_BARRIER_FAILED flag was set by a lower - * layer, it means the underlying device no longer supports - * barrier I/O. Warn loudly and turn off barriers. - */ - if (bp->b_flags & _XFS_BARRIER_FAILED) { - bp->b_flags &= ~_XFS_BARRIER_FAILED; - l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; - xfs_fs_cmn_err(CE_WARN, l->l_mp, - "xlog_iodone: Barriers are no longer supported" - " by device. Disabling barriers\n"); - } - - /* * Race to shutdown the filesystem if we see an error. */ if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 66d585c..4c7c7bf 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -2299,15 +2299,22 @@ xfs_alloc_file_space( e = allocatesize_fsb; } + /* + * The transaction reservation is limited to a 32-bit block + * count, hence we need to limit the number of blocks we are + * trying to reserve to avoid an overflow. We can't allocate + * more than @nimaps extents, and an extent is limited on disk + * to MAXEXTLEN (21 bits), so use that to enforce the limit. + */ + resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); if (unlikely(rt)) { - resrtextents = qblocks = (uint)(e - s); + resrtextents = qblocks = resblks; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; - resblks = qblocks = \ - XFS_DIOSTRAT_SPACE_RES(mp, (uint)(e - s)); + resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); quota_flag = XFS_QMOPT_RES_REGBLKS; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index baacd98..4de84ce 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -377,9 +377,6 @@ struct acpi_pci_root { u32 osc_support_set; /* _OSC state of support bits */ u32 osc_control_set; /* _OSC state of control bits */ - u32 osc_control_qry; /* the latest _OSC query result */ - - u32 osc_queried:1; /* has _OSC control been queried? */ }; /* helper */ diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c7376bf..8ca18e2 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -16,15 +16,27 @@ * While the GPIO programming interface defines valid GPIO numbers * to be in the range 0..MAX_INT, this library restricts them to the * smaller range 0..ARCH_NR_GPIOS-1. + * + * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of + * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is + * actually an estimate of a board-specific value. */ #ifndef ARCH_NR_GPIOS #define ARCH_NR_GPIOS 256 #endif +/* + * "valid" GPIO numbers are nonnegative and may be passed to + * setup routines like gpio_request(). only some valid numbers + * can successfully be requested and used. + * + * Invalid GPIO numbers are useful for indicating no-such-GPIO in + * platform data and other tables. + */ + static inline int gpio_is_valid(int number) { - /* only some non-negative numbers are valid */ return ((unsigned)number) < ARCH_NR_GPIOS; } diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index b5043a9..08923b6 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -70,11 +70,16 @@ extern void setup_per_cpu_areas(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)(cpu), &(var))) -#define __get_cpu_var(var) (var) -#define __raw_get_cpu_var(var) (var) -#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) +#define VERIFY_PERCPU_PTR(__p) ({ \ + __verify_pcpu_ptr((__p)); \ + (typeof(*(__p)) __kernel __force *)(__p); \ +}) + +#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) +#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) +#define __this_cpu_ptr(ptr) this_cpu_ptr(ptr) #endif /* SMP */ diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c9f3cc5..3e5a51a 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,7 +386,15 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); - enum drm_connector_status (*detect)(struct drm_connector *connector); + + /* Check to see if anything is attached to the connector. + * @force is set to false whilst polling, true when checking the + * connector due to user request. @force can be used by the driver + * to avoid expensive, destructive operations during automated + * probing. + */ + enum drm_connector_status (*detect)(struct drm_connector *connector, + bool force); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ccf94dc..c227757 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -304,8 +304,8 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); OSC_PCI_EXPRESS_PME_CONTROL | \ OSC_PCI_EXPRESS_AER_CONTROL | \ OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL) - -extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags); +extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, + u32 *mask, u32 req); extern void acpi_early_init(void); #else /* !CONFIG_ACPI */ diff --git a/include/linux/ata.h b/include/linux/ata.h index fe6e681..0c4929f 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -89,6 +89,7 @@ enum { ATA_ID_SPG = 98, ATA_ID_LBA_CAPACITY_2 = 100, ATA_ID_SECTOR_SIZE = 106, + ATA_ID_LOGICAL_SECTOR_SIZE = 117, /* and 118 */ ATA_ID_LAST_LUN = 126, ATA_ID_DLF = 128, ATA_ID_CSFO = 129, @@ -640,16 +641,49 @@ static inline int ata_id_flush_ext_enabled(const u16 *id) return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; } -static inline int ata_id_has_large_logical_sectors(const u16 *id) +static inline u32 ata_id_logical_sector_size(const u16 *id) { - if ((id[ATA_ID_SECTOR_SIZE] & 0xc000) != 0x4000) - return 0; - return id[ATA_ID_SECTOR_SIZE] & (1 << 13); + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. + * IDENTIFY DEVICE data, word 117-118. + * 0xd000 ignores bit 13 (logical:physical > 1) + */ + if ((id[ATA_ID_SECTOR_SIZE] & 0xd000) == 0x5000) + return (((id[ATA_ID_LOGICAL_SECTOR_SIZE+1] << 16) + + id[ATA_ID_LOGICAL_SECTOR_SIZE]) * sizeof(u16)) ; + return ATA_SECT_SIZE; +} + +static inline u8 ata_id_log2_per_physical_sector(const u16 *id) +{ + /* T13/1699-D Revision 6a, Sep 6, 2008. Page 128. + * IDENTIFY DEVICE data, word 106. + * 0xe000 ignores bit 12 (logical sector > 512 bytes) + */ + if ((id[ATA_ID_SECTOR_SIZE] & 0xe000) == 0x6000) + return (id[ATA_ID_SECTOR_SIZE] & 0xf); + return 0; } -static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) +/* Offset of logical sectors relative to physical sectors. + * + * If device has more than one logical sector per physical sector + * (aka 512 byte emulation), vendors might offset the "sector 0" address + * so sector 63 is "naturally aligned" - e.g. FAT partition table. + * This avoids Read/Mod/Write penalties when using FAT partition table + * and updating "well aligned" (FS perspective) physical sectors on every + * transaction. + */ +static inline u16 ata_id_logical_sector_offset(const u16 *id, + u8 log2_per_phys) { - return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); + u16 word_209 = id[209]; + + if ((log2_per_phys > 1) && (word_209 & 0xc000) == 0x4000) { + u16 first = word_209 & 0x3fff; + if (first > 0) + return (1 << log2_per_phys) - first; + } + return 0; } static inline int ata_id_has_lba48(const u16 *id) diff --git a/include/linux/bio.h b/include/linux/bio.h index 5274103..2c3fd74 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -496,6 +496,10 @@ static inline struct bio *bio_list_get(struct bio_list *bl) #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) +#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ + for_each_bio(_bio) \ + bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) + #define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ca83a97..0f7fde2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -130,6 +130,8 @@ enum rq_flag_bits { /* bio only flags */ __REQ_UNPLUG, /* unplug the immediately after submission */ __REQ_RAHEAD, /* read ahead, can fail anytime */ + __REQ_THROTTLED, /* This bio has already been subjected to + * throttling rules. Don't do it again. */ /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ @@ -143,7 +145,6 @@ enum rq_flag_bits { __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_INTEGRITY, /* integrity metadata has been remapped */ @@ -168,10 +169,12 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) +#define REQ_THROTTLED (1 << __REQ_THROTTLED) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -184,7 +187,6 @@ enum rq_flag_bits { #define REQ_FAILED (1 << __REQ_FAILED) #define REQ_QUIET (1 << __REQ_QUIET) #define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_COPY_USER (1 << __REQ_COPY_USER) #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906..5262190 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,9 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + unsigned short nr_integrity_segments; +#endif unsigned short ioprio; @@ -243,6 +246,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; + unsigned short max_integrity_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -355,18 +359,25 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ - unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; - struct request *orig_bar_rq; + unsigned int flush_flags; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif + +#ifdef CONFIG_BLK_DEV_THROTTLING + /* Throttle data */ + struct throtl_data *td; +#endif }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ @@ -462,56 +473,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - * TAG : ordering by tag is enough - * TAG_FLUSH : ordering by tag w/ pre and post flushes - * TAG_FUA : ordering by tag w/ pre flush and FUA write - */ - QUEUE_ORDERED_BY_DRAIN = 0x01, - QUEUE_ORDERED_BY_TAG = 0x02, - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * Ordered operation sequence - */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -521,7 +482,6 @@ enum { #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) @@ -592,7 +552,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ @@ -881,12 +842,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); -extern bool blk_do_ordered(struct request_queue *, struct request **); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); @@ -919,27 +876,20 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return NULL; return bqt->tag_index[tag]; } -enum{ - BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ - BLKDEV_SECURE, /* secure discard */ -}; -#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) -#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, - unsigned long); + +#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ + +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) + sector_t nr_sects, gfp_t gfp_mask); +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); @@ -1093,11 +1043,11 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } -static inline int blk_rq_aligned(struct request_queue *q, void *addr, +static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; - return !((unsigned long)addr & alignment) && !(len & alignment); + return !(addr & alignment) && !(len & alignment); } /* assumes size > 256 */ @@ -1127,6 +1077,7 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1170,6 +1121,24 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif +#ifdef CONFIG_BLK_DEV_THROTTLING +extern int blk_throtl_init(struct request_queue *q); +extern void blk_throtl_exit(struct request_queue *q); +extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); +extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); +extern void throtl_shutdown_timer_wq(struct request_queue *q); +#else /* CONFIG_BLK_DEV_THROTTLING */ +static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) +{ + return 0; +} + +static inline int blk_throtl_init(struct request_queue *q) { return 0; } +static inline int blk_throtl_exit(struct request_queue *q) { return 0; } +static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} +static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} +#endif /* CONFIG_BLK_DEV_THROTTLING */ + #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ @@ -1213,8 +1182,13 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request *); +extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +extern int blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern int blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1235,16 +1209,32 @@ static inline int blk_integrity_rq(struct request *rq) return bio_integrity(rq->bio); } +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ #define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a) (0) -#define blk_rq_map_integrity_sg(a, b) (0) +#define blk_rq_count_integrity_sg(a, b) (0) +#define blk_rq_map_integrity_sg(a, b, c) (0) #define bdev_get_integrity(a) (0) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); +#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define queue_max_integrity_segments(a) (0) +#define blk_integrity_merge_rq(a, b, c) (0) +#define blk_integrity_merge_bio(a, b, c) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12..dd1b25b 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e..0c99102 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); + +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/include/linux/compat.h b/include/linux/compat.h index 9ddc878..5778b55 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, struct iovec **ret_pointer); + +extern void __user *compat_alloc_user_space(unsigned long len); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c958f4..926b503 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); +extern int elevator_change(struct request_queue *, const char *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b6..6b0f6e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,12 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -156,16 +156,12 @@ struct inodes_stat_t { #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) #define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) #define WRITE_META (WRITE | REQ_META) -#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_HARDBARRIER) - -/* - * These aren't really reads or writes, they pass down information about - * parts of device that are now unused by the file system. - */ -#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) -#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) -#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) #define SEL_IN 1 #define SEL_OUT 2 diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4..66e26b5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -86,7 +87,15 @@ struct disk_stats { unsigned long io_ticks; unsigned long time_in_queue; }; - + +#define PARTITION_META_INFO_VOLNAMELTH 64 +#define PARTITION_META_INFO_UUIDLTH 16 + +struct partition_meta_info { + u8 uuid[PARTITION_META_INFO_UUIDLTH]; /* always big endian */ + u8 volname[PARTITION_META_INFO_VOLNAMELTH]; +}; + struct hd_struct { sector_t start_sect; sector_t nr_sects; @@ -95,6 +104,7 @@ struct hd_struct { struct device __dev; struct kobject *holder_dir; int policy, partno; + struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; #endif @@ -181,6 +191,30 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } +static inline void part_pack_uuid(const u8 *uuid_str, u8 *to) +{ + int i; + for (i = 0; i < 16; ++i) { + *to++ = (hex_to_bin(*uuid_str) << 4) | + (hex_to_bin(*(uuid_str + 1))); + uuid_str += 2; + switch (i) { + case 3: + case 5: + case 7: + case 9: + uuid_str++; + continue; + } + } +} + +static inline char *part_unpack_uuid(const u8 *uuid, char *out) +{ + sprintf(out, "%pU", uuid); + return out; +} + static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -342,6 +376,19 @@ static inline int part_in_flight(struct hd_struct *part) return part->in_flight[0] + part->in_flight[1]; } +static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk) +{ + if (disk) + return kzalloc_node(sizeof(struct partition_meta_info), + GFP_KERNEL, disk->node_id); + return kzalloc(sizeof(struct partition_meta_info), GFP_KERNEL); +} + +static inline void free_part_info(struct hd_struct *part) +{ + kfree(part->info); +} + /* block/blk-core.c */ extern void part_round_stats(int cpu, struct hd_struct *part); @@ -533,7 +580,9 @@ extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern struct hd_struct * __must_check add_partition(struct gendisk *disk, int partno, sector_t start, - sector_t len, int flags); + sector_t len, int flags, + struct partition_meta_info + *info); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 03f616b..e41f7dd 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -13,6 +13,7 @@ #include struct device; +struct gpio_chip; /* * Some platforms don't support the GPIO programming interface. diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h index ee3049c..52baa79 100644 --- a/include/linux/i2c/sx150x.h +++ b/include/linux/i2c/sx150x.h @@ -63,6 +63,9 @@ * IRQ lines will appear. Similarly to gpio_base, the expander * will create a block of irqs beginning at this number. * This value is ignored if irq_summary is < 0. + * @reset_during_probe: If set to true, the driver will trigger a full + * reset of the chip at the beginning of the probe + * in order to place it in a known state. */ struct sx150x_platform_data { unsigned gpio_base; @@ -73,6 +76,7 @@ struct sx150x_platform_data { u16 io_polarity; int irq_summary; unsigned irq_base; + bool reset_during_probe; }; #endif /* __LINUX_I2C_SX150X_H */ diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h new file mode 100644 index 0000000..1d19ab2 --- /dev/null +++ b/include/linux/intel-gtt.h @@ -0,0 +1,20 @@ +/* + * Common Intel AGPGART and GTT definitions. + */ +#ifndef _INTEL_GTT_H +#define _INTEL_GTT_H + +#include + +/* This is for Intel only GTT controls. + * + * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only + */ + +#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2) +#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4) + +/* flag for GFDT type */ +#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3) + +#endif diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0a6b3d5..7fb5927 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping) } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) @@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { iounmap_atomic(vaddr, slot); } -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { resource_size_t phys_addr; @@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { iounmap(vaddr); } @@ -125,38 +125,38 @@ struct io_mapping; static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping __force *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); + iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { } /* Non-atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b0a35e..f5df2f4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -641,6 +641,16 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } _max1 > _max2 ? _max1 : _max2; }) /** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** * clamp - return a value clamped to a given range with strict typechecking * @val: current value * @min: minimum allowable value diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 4aa95f2..62dbee5 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.in = __tmp->kfifo.out = 0; \ }) @@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset_out(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.out = __tmp->kfifo.in; \ }) @@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_len(fifo) \ ({ \ - typeof(fifo + 1) __tmpl = (fifo); \ + typeof((fifo) + 1) __tmpl = (fifo); \ __tmpl->kfifo.in - __tmpl->kfifo.out; \ }) @@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_empty(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ __tmpq->kfifo.in == __tmpq->kfifo.out; \ }) @@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_full(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ kfifo_len(__tmpq) > __tmpq->kfifo.mask; \ }) @@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val) #define kfifo_avail(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \ (__recsize) ? ((__avail <= __recsize) ? 0 : \ @@ -284,7 +284,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_skip(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__recsize) \ @@ -302,7 +302,7 @@ __kfifo_must_check_helper( \ #define kfifo_peek_len(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \ @@ -325,7 +325,7 @@ __kfifo_must_check_helper( \ #define kfifo_alloc(fifo, size, gfp_mask) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \ @@ -339,7 +339,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_free(fifo) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__is_kfifo_ptr(__tmp)) \ __kfifo_free(__kfifo); \ @@ -358,7 +358,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_init(fifo, buffer, size) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \ @@ -379,8 +379,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_put(fifo, val) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -421,8 +421,8 @@ __kfifo_must_check_helper( \ #define kfifo_get(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -462,8 +462,8 @@ __kfifo_must_check_helper( \ #define kfifo_peek(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -501,8 +501,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_in(fifo, buf, n) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -554,8 +554,8 @@ __kfifo_must_check_helper( \ #define kfifo_out(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -611,7 +611,7 @@ __kfifo_must_check_helper( \ #define kfifo_from_user(fifo, from, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -639,7 +639,7 @@ __kfifo_must_check_helper( \ #define kfifo_to_user(fifo, to, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -666,7 +666,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -690,7 +690,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -717,7 +717,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -741,7 +741,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -766,8 +766,8 @@ __kfifo_must_check_helper( \ #define kfifo_out_peek(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 74d691e..3319a69 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,6 +16,9 @@ struct stable_node; struct mem_cgroup; +struct page *ksm_does_need_to_copy(struct page *page, + struct vm_area_struct *vma, unsigned long address); + #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); @@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page, * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_does_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = page_anon_vma(page); - if (!anon_vma || - (anon_vma->root == vma->anon_vma->root && - page->index == linear_page_index(vma, address))) - return page; - - return ksm_does_need_to_copy(page, vma, address); + return anon_vma && + (anon_vma->root != vma->anon_vma->root || + page->index != linear_page_index(vma, address)); } int page_referenced_ksm(struct page *page, @@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { - return page; + return 0; } static inline int page_referenced_ksm(struct page *page, diff --git a/include/linux/lglock.h b/include/linux/lglock.h index b288cb7..f549056 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -150,7 +150,7 @@ int i; \ preempt_disable(); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -161,7 +161,7 @@ void name##_global_unlock(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ diff --git a/include/linux/libata.h b/include/linux/libata.h index f010f18..15efec0 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -37,6 +37,7 @@ #include #include #include +#include /* * Define if arch has non-standard setup. This is a _PCI_ standard @@ -172,6 +173,7 @@ enum { ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ + ATA_LFLAG_NO_LPM = (1 << 8), /* disable LPM on this link */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ @@ -196,7 +198,7 @@ enum { ATA_FLAG_ACPI_SATA = (1 << 17), /* need native SATA ACPI layout */ ATA_FLAG_AN = (1 << 18), /* controller supports AN */ ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ - ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ + ATA_FLAG_LPM = (1 << 20), /* driver can handle LPM */ ATA_FLAG_EM = (1 << 21), /* driver supports enclosure * management */ ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity @@ -324,17 +326,17 @@ enum { ATA_EH_HARDRESET = (1 << 2), /* meaningful only in ->prereset */ ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), - ATA_EH_LPM = (1 << 4), /* link power management action */ ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | - ATA_EH_ENABLE_LINK | ATA_EH_LPM, + ATA_EH_ENABLE_LINK, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ ATA_EHI_QUIET = (1 << 3), /* be quiet */ + ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ @@ -376,7 +378,6 @@ enum { ATA_HORKAGE_BROKEN_HPA = (1 << 4), /* Broken HPA */ ATA_HORKAGE_DISABLE = (1 << 5), /* Disable it */ ATA_HORKAGE_HPA_SIZE = (1 << 6), /* native size off by one */ - ATA_HORKAGE_IPM = (1 << 7), /* Link PM problems */ ATA_HORKAGE_IVB = (1 << 8), /* cbl det validity bit bugs */ ATA_HORKAGE_STUCK_ERR = (1 << 9), /* stuck ERR on next PACKET */ ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ @@ -463,6 +464,22 @@ enum ata_completion_errors { AC_ERR_NCQ = (1 << 10), /* marker for offending NCQ qc */ }; +/* + * Link power management policy: If you alter this, you also need to + * alter libata-scsi.c (for the ascii descriptions) + */ +enum ata_lpm_policy { + ATA_LPM_UNKNOWN, + ATA_LPM_MAX_POWER, + ATA_LPM_MED_POWER, + ATA_LPM_MIN_POWER, +}; + +enum ata_lpm_hints { + ATA_LPM_EMPTY = (1 << 0), /* port empty/probing */ + ATA_LPM_HIPM = (1 << 1), /* may use HIPM */ +}; + /* forward declarations */ struct scsi_device; struct ata_port_operations; @@ -477,16 +494,6 @@ typedef int (*ata_reset_fn_t)(struct ata_link *link, unsigned int *classes, unsigned long deadline); typedef void (*ata_postreset_fn_t)(struct ata_link *link, unsigned int *classes); -/* - * host pm policy: If you alter this, you also need to alter libata-scsi.c - * (for the ascii descriptions) - */ -enum link_pm { - NOT_AVAILABLE, - MIN_POWER, - MAX_PERFORMANCE, - MEDIUM_POWER, -}; extern struct device_attribute dev_attr_link_power_management_policy; extern struct device_attribute dev_attr_unload_heads; extern struct device_attribute dev_attr_em_message_type; @@ -529,6 +536,10 @@ struct ata_host { void *private_data; struct ata_port_operations *ops; unsigned long flags; + + struct mutex eh_mutex; + struct task_struct *eh_owner; + #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; #endif @@ -559,13 +570,13 @@ struct ata_queued_cmd { unsigned int extrabytes; unsigned int curbytes; - struct scatterlist *cursg; - unsigned int cursg_ofs; - struct scatterlist sgent; struct scatterlist *sg; + struct scatterlist *cursg; + unsigned int cursg_ofs; + unsigned int err_mask; struct ata_taskfile result_tf; ata_qc_cb_t complete_fn; @@ -603,6 +614,7 @@ struct ata_device { union acpi_object *gtf_cache; unsigned int gtf_filter; #endif + struct device tdev; /* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */ u64 n_sectors; /* size of device, if ATA */ u64 n_native_sectors; /* native size, if ATA */ @@ -689,6 +701,7 @@ struct ata_link { struct ata_port *ap; int pmp; /* port multiplier port # */ + struct device tdev; unsigned int active_tag; /* active tag on this link */ u32 sactive; /* active NCQ commands */ @@ -698,6 +711,7 @@ struct ata_link { unsigned int hw_sata_spd_limit; unsigned int sata_spd_limit; unsigned int sata_spd; /* current SATA PHY speed */ + enum ata_lpm_policy lpm_policy; /* record runtime error info, protected by host_set lock */ struct ata_eh_info eh_info; @@ -706,6 +720,8 @@ struct ata_link { struct ata_device device[ATA_MAX_DEVICES]; }; +#define ATA_LINK_CLEAR_BEGIN offsetof(struct ata_link, active_tag) +#define ATA_LINK_CLEAR_END offsetof(struct ata_link, device[0]) struct ata_port { struct Scsi_Host *scsi_host; /* our co-allocated scsi host */ @@ -723,6 +739,7 @@ struct ata_port { struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ + struct ata_link* sff_pio_task_link; /* link currently used */ struct delayed_work sff_pio_task; #ifdef CONFIG_ATA_BMDMA struct ata_bmdma_prd *bmdma_prd; /* BMDMA SG list */ @@ -750,6 +767,7 @@ struct ata_port { struct ata_port_stats stats; struct ata_host *host; struct device *dev; + struct device tdev; struct mutex scsi_scan_mutex; struct delayed_work hotplug_task; @@ -765,7 +783,7 @@ struct ata_port { pm_message_t pm_mesg; int *pm_result; - enum link_pm pm_policy; + enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; @@ -831,8 +849,8 @@ struct ata_port_operations { int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); - int (*enable_pm)(struct ata_port *ap, enum link_pm policy); - void (*disable_pm)(struct ata_port *ap); + int (*set_lpm)(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); /* * Start, stop, suspend and resume @@ -944,6 +962,8 @@ extern int sata_link_debounce(struct ata_link *link, const unsigned long *params, unsigned long deadline); extern int sata_link_resume(struct ata_link *link, const unsigned long *params, unsigned long deadline); +extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, + bool spm_wakeup); extern int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, unsigned long deadline, bool *online, int (*check_ready)(struct ata_link *)); @@ -989,8 +1009,9 @@ extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); #endif extern int ata_ratelimit(void); -extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, - unsigned long interval, unsigned long timeout); +extern void ata_msleep(struct ata_port *ap, unsigned int msecs); +extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, + u32 val, unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1594,7 +1615,7 @@ extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); -extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay); +extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, diff --git a/include/linux/mm.h b/include/linux/mm.h index e6b1210..74949fb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -864,6 +864,12 @@ int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index 329a8fa..245cdac 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -38,6 +38,8 @@ * [8:0] Byte/block count */ +#define R4_MEMORY_PRESENT (1 << 27) + /* SDIO status in R5 Type diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e6e626..3984c4e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -284,6 +284,13 @@ struct zone { unsigned long watermark[NR_WMARK]; /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several * GB of ram we must reserve some of the lower zone memory (otherwise we risk @@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } +#ifdef CONFIG_SMP +unsigned long zone_nr_free_pages(struct zone *zone); +#else +#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) +#endif /* CONFIG_SMP */ + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 878cab4..f363bc8 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -78,6 +78,14 @@ struct mutex_waiter { # include #else # define __DEBUG_MUTEX_INITIALIZER(lockname) +/** + * mutex_init - initialize the mutex + * @mutex: the mutex to be initialized + * + * Initialize the mutex to unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ # define mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 791d510..50d8009 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -63,20 +63,20 @@ static inline bool netpoll_rx(struct sk_buff *skb) unsigned long flags; bool ret = false; - rcu_read_lock_bh(); + local_irq_save(flags); npinfo = rcu_dereference_bh(skb->dev->npinfo); if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags)) goto out; - spin_lock_irqsave(&npinfo->rx_lock, flags); + spin_lock(&npinfo->rx_lock); /* check rx_flags again with the lock held */ if (npinfo->rx_flags && __netpoll_rx(skb)) ret = true; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + spin_unlock(&npinfo->rx_lock); out: - rcu_read_unlock_bh(); + local_irq_restore(flags); return ret; } diff --git a/include/linux/pci.h b/include/linux/pci.h index b1d1795..c8d95e3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1214,6 +1214,9 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) { return NULL; } +static inline int pci_domain_nr(struct pci_bus *bus) +{ return 0; } + #define dev_is_pci(d) (false) #define dev_is_pf(d) (false) #define dev_num_vf(d) (0) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f6a3b2d..10d3330 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2300,6 +2300,8 @@ #define PCI_DEVICE_ID_P2010 0x0079 #define PCI_DEVICE_ID_P1020E 0x0100 #define PCI_DEVICE_ID_P1020 0x0101 +#define PCI_DEVICE_ID_P1021E 0x0102 +#define PCI_DEVICE_ID_P1021 0x0103 #define PCI_DEVICE_ID_P1011E 0x0108 #define PCI_DEVICE_ID_P1011 0x0109 #define PCI_DEVICE_ID_P1022E 0x0110 diff --git a/include/linux/percpu.h b/include/linux/percpu.h index b8b9084..49466b1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -149,7 +149,7 @@ extern void __init percpu_init_late(void); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) /* can't distinguish from other static vars, always false */ static inline bool is_kernel_percpu_address(unsigned long addr) diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d50ba85..d1a9193 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) int ret; ret = dquot_alloc_space_nodirty(inode, nr); - if (!ret) - mark_inode_dirty_sync(inode); + if (!ret) { + /* + * Mark inode fully dirty. Since we are allocating blocks, inode + * would become fully dirty soon anyway and it reportedly + * reduces inode_lock contention. + */ + mark_inode_dirty(inode); + } return ret; } diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 7415839..5310d27 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -26,6 +26,9 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + #define DECLARE_MUTEX(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) diff --git a/include/linux/serial.h b/include/linux/serial.h index 1ebc694..ef91406 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -77,8 +77,7 @@ struct serial_struct { #define PORT_16654 11 #define PORT_16850 12 #define PORT_RSA 13 /* RSA-DV II/S card */ -#define PORT_U6_16550A 14 -#define PORT_MAX 14 +#define PORT_MAX 13 #define SERIAL_IO_PORT 0 #define SERIAL_IO_HUB6 1 diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 64458a9..563e234 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -44,7 +44,8 @@ #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ #define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ -#define PORT_MAX_8250 18 /* max port ID */ +#define PORT_U6_16550A 19 /* ST-Ericsson U6xxx internal UART */ +#define PORT_MAX_8250 19 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index cc813f9..c91302f 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -14,7 +14,9 @@ #define SPI_MODE_OFFSET 6 #define SPI_SCPH_OFFSET 6 #define SPI_SCOL_OFFSET 7 + #define SPI_TMOD_OFFSET 8 +#define SPI_TMOD_MASK (0x3 << SPI_TMOD_OFFSET) #define SPI_TMOD_TR 0x0 /* xmit & recv */ #define SPI_TMOD_TO 0x1 /* xmit only */ #define SPI_TMOD_RO 0x2 /* recv only */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 569dc72..85f38a63 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { - struct kref cl_kref; /* Number of references */ + atomic_t cl_count; /* Number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fee51a..7cdd633 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -19,6 +19,7 @@ struct bio; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 +#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ static inline int current_is_kswapd(void) { @@ -142,7 +143,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ @@ -315,6 +316,7 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -331,13 +333,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_HIBERNATION -void hibernation_freeze_swap(void); -void hibernation_thaw_swap(void); -swp_entry_t get_swap_for_hibernation(int type); -void swap_free_for_hibernation(swp_entry_t val); -#endif - /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7f43ccd..eaaea37 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } +/* + * More accurate version that also considers the currently pending + * deltas. For that we need to loop over all cpus to find the current + * deltas. There is no synchronization so the result cannot be + * exactly accurate either. + */ +static inline unsigned long zone_page_state_snapshot(struct zone *zone, + enum zone_stat_item item) +{ + long x = atomic_long_read(&zone->vm_stat[item]); + +#ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; + + if (x < 0) + x = 0; +#endif + return x; +} + extern unsigned long global_reclaimable_pages(void); extern unsigned long zone_reclaimable_pages(struct zone *zone); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4f9d277..25e02c9 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work); enum { WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ - WORK_STRUCT_CWQ_BIT = 1, /* data points to cwq */ - WORK_STRUCT_LINKED_BIT = 2, /* next work is linked to this one */ + WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ + WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ + WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ #ifdef CONFIG_DEBUG_OBJECTS_WORK - WORK_STRUCT_STATIC_BIT = 3, /* static initializer (debugobjects) */ - WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ + WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ + WORK_STRUCT_COLOR_SHIFT = 5, /* color for workqueue flushing */ #else - WORK_STRUCT_COLOR_SHIFT = 3, /* color for workqueue flushing */ + WORK_STRUCT_COLOR_SHIFT = 4, /* color for workqueue flushing */ #endif WORK_STRUCT_COLOR_BITS = 4, WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, #ifdef CONFIG_DEBUG_OBJECTS_WORK @@ -59,8 +61,8 @@ enum { /* * Reserve 7 bits off of cwq pointer w/ debugobjects turned - * off. This makes cwqs aligned to 128 bytes which isn't too - * excessive while allowing 15 workqueue flush colors. + * off. This makes cwqs aligned to 256 bytes and allows 15 + * workqueue flush colors. */ WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_COLOR_BITS, @@ -233,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) +/* + * Workqueue flags and constants. For details, please refer to + * Documentation/workqueue.txt. + */ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ @@ -241,6 +247,8 @@ enum { WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_DYING = 1 << 6, /* internal: workqueue is dying */ + WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc35..ef6c24a 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -27,11 +27,17 @@ struct cgroup_cls_state #ifdef CONFIG_NET_CLS_CGROUP static inline u32 task_cls_classid(struct task_struct *p) { + int classid; + if (in_interrupt()) return 0; - return container_of(task_subsys_state(p, net_cls_subsys_id), - struct cgroup_cls_state, css)->classid; + rcu_read_lock(); + classid = container_of(task_subsys_state(p, net_cls_subsys_id), + struct cgroup_cls_state, css)->classid; + rcu_read_unlock(); + + return classid; } #else extern int net_cls_subsys_id; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a4747a0..f976885 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -955,6 +955,9 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, + int outin); + #endif /* __KERNEL__ */ #endif /* _NET_IP_VS_H */ diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfb..adab9dc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -752,6 +752,7 @@ struct proto { /* Keeping track of sk's, looking them up, and port selection methods. */ void (*hash)(struct sock *sk); void (*unhash)(struct sock *sk); + void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); /* Keeping track of sockets in use */ diff --git a/include/net/tcp.h b/include/net/tcp.h index eaa9582..3e4b33e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -475,8 +475,22 @@ extern unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); + int cutoff; + + /* When peer uses tiny windows, there is no use in packetizing + * to sub-MSS pieces for the sake of SWS or making sure there + * are enough packets in the pipe for fast recovery. + * + * On the other hand, for extremely large MSS devices, handling + * smaller than MSS windows in this way does make sense. + */ + if (tp->max_window >= 512) + cutoff = (tp->max_window >> 1); + else + cutoff = tp->max_window; + + if (cutoff && pktsize > cutoff) + return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; } diff --git a/include/net/udp.h b/include/net/udp.h index 7abdf30..a184d34 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -151,6 +151,7 @@ static inline void udp_lib_hash(struct sock *sk) } extern void udp_lib_unhash(struct sock *sk); +extern void udp_lib_rehash(struct sock *sk, u16 new_hash); static inline void udp_lib_close(struct sock *sk, long timeout) { diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 8fcb6e0..d63533a 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -32,6 +32,12 @@ struct scsi_cmnd; #endif /* + * DIX-capable adapters effectively support infinite chaining for the + * protection information scatterlist + */ +#define SCSI_MAX_PROT_SG_SEGMENTS 0xFFFF + +/* * Special value for scanning to specify scanning or rescanning of all * possible channels, (target) ids, or luns on a given shost. */ diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b7bdecb..d0a6a84 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -388,6 +388,7 @@ struct scsi_host_template { * of scatter-gather. */ unsigned short sg_tablesize; + unsigned short sg_prot_tablesize; /* * Set this if the host adapter has limitations beside segment count. @@ -599,6 +600,7 @@ struct Scsi_Host { int can_queue; short cmd_per_lun; short unsigned int sg_tablesize; + short unsigned int sg_prot_tablesize; short unsigned int max_sectors; unsigned long dma_boundary; /* @@ -823,6 +825,11 @@ static inline unsigned int scsi_host_get_prot(struct Scsi_Host *shost) return shost->prot_capabilities; } +static inline int scsi_host_prot_dma(struct Scsi_Host *shost) +{ + return shost->prot_capabilities >= SHOST_DIX_TYPE0_PROTECTION; +} + static inline unsigned int scsi_host_dif_capable(struct Scsi_Host *shost, unsigned int target_type) { static unsigned char cap[] = { 0, diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h index 1723138..d6e7994 100644 --- a/include/scsi/scsi_tcq.h +++ b/include/scsi/scsi_tcq.h @@ -97,13 +97,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth) static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg) { struct request *req = cmd->request; - struct scsi_device *sdev = cmd->device; if (blk_rq_tagged(req)) { - if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER) - *msg++ = MSG_ORDERED_TAG; - else - *msg++ = MSG_SIMPLE_TAG; + *msg++ = MSG_SIMPLE_TAG; *msg++ = req->tag; return 2; } diff --git a/init/Kconfig b/init/Kconfig index 2de5b1c..950ba26 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -634,11 +634,14 @@ config BLK_CGROUP Currently, CFQ IO scheduler uses it to recognize task groups and control disk bandwidth allocation (proportional time slice allocation) - to such task groups. + to such task groups. It is also used by bio throttling logic in + block layer to implement upper limit in IO rates on a device. This option only enables generic Block IO controller infrastructure. - One needs to also enable actual IO controlling logic in CFQ for it - to take effect. (CONFIG_CFQ_GROUP_IOSCHED=y). + One needs to also enable actual IO controlling logic/policy. For + enabling proportional weight division of disk bandwidth in CFQ seti + CONFIG_CFQ_GROUP_IOSCHED=y and for enabling throttling policy set + CONFIG_BLK_THROTTLE=y. See Documentation/cgroups/blkio-controller.txt for more information. diff --git a/init/do_mounts.c b/init/do_mounts.c index 02e3ca4..42db055 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -58,6 +58,62 @@ static int __init readwrite(char *str) __setup("ro", readonly); __setup("rw", readwrite); +#ifdef CONFIG_BLOCK +/** + * match_dev_by_uuid - callback for finding a partition using its uuid + * @dev: device passed in by the caller + * @data: opaque pointer to a 36 byte char array with a UUID + * + * Returns 1 if the device matches, and 0 otherwise. + */ +static int match_dev_by_uuid(struct device *dev, void *data) +{ + u8 *uuid = data; + struct hd_struct *part = dev_to_part(dev); + + if (!part->info) + goto no_match; + + if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid))) + goto no_match; + + return 1; +no_match: + return 0; +} + + +/** + * devt_from_partuuid - looks up the dev_t of a partition by its UUID + * @uuid: 36 byte char array containing a hex ascii UUID + * + * The function will return the first partition which contains a matching + * UUID value in its partition_meta_info struct. This does not search + * by filesystem UUIDs. + * + * Returns the matching dev_t on success or 0 on failure. + */ +static dev_t __init devt_from_partuuid(char *uuid_str) +{ + dev_t res = 0; + struct device *dev = NULL; + u8 uuid[16]; + + /* Pack the requested UUID in the expected format. */ + part_pack_uuid(uuid_str, uuid); + + dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid); + if (!dev) + goto done; + + res = dev->devt; + put_device(dev); + +done: + return res; +} +#endif + /* * Convert a name into device number. We accept the following variants: * @@ -68,6 +124,8 @@ __setup("rw", readwrite); * of partition - device number of disk plus the partition number * 5) /dev/p - same as the above, that form is * used when disk name of partitioned disk ends on a digit. + * 6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the + * unique id of a partition if the partition table provides it. * * If name doesn't have fall into the categories above, we return (0,0). * block_class is used to check if something is a disk name. If the disk @@ -82,6 +140,18 @@ dev_t name_to_dev_t(char *name) dev_t res = 0; int part; +#ifdef CONFIG_BLOCK + if (strncmp(name, "PARTUUID=", 9) == 0) { + name += 9; + if (strlen(name) != 36) + goto fail; + res = devt_from_partuuid(name); + if (!res) + goto fail; + goto done; + } +#endif + if (strncmp(name, "/dev/", 5) != 0) { unsigned maj, min; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c..c9483d8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1791,19 +1791,20 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; - struct cgroup *cur_cg; int retval = 0; cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); - retval = cgroup_attach_task(cur_cg, tsk); + struct cgroup *from_cg = task_cgroup_from_root(from, root); + + retval = cgroup_attach_task(from_cg, tsk); if (retval) break; } @@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex diff --git a/kernel/compat.c b/kernel/compat.c index e167efc..c9e2ec0 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) return 0; } + +/* + * Allocate user-space memory for the duration of a single system call, + * in order to marshall parameters inside a compat thunk. + */ +void __user *compat_alloc_user_space(unsigned long len) +{ + void __user *ptr; + + /* If len would occupy more than half of the entire compat space... */ + if (unlikely(len > (((compat_uptr_t)~0) >> 1))) + return NULL; + + ptr = arch_compat_alloc_user_space(len); + + if (unlikely(!access_ok(VERIFY_WRITE, ptr, len))) + return NULL; + + return ptr; +} +EXPORT_SYMBOL_GPL(compat_alloc_user_space); diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index 75bd9b3..20059ef 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -274,7 +274,6 @@ static int kdb_bp(int argc, const char **argv) int i, bpno; kdb_bp_t *bp, *bp_check; int diag; - int free; char *symname = NULL; long offset = 0ul; int nextarg; @@ -305,7 +304,6 @@ static int kdb_bp(int argc, const char **argv) /* * Find an empty bp structure to allocate */ - free = KDB_MAXBPT; for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) { if (bp->bp_free) break; diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index ef3c3f8..f83972b 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c @@ -33,10 +33,11 @@ * @children: child nodes * @all: list head for list of all nodes * @parent: parent node - * @info: associated profiling data structure if not a directory - * @ghost: when an object file containing profiling data is unloaded we keep a - * copy of the profiling data here to allow collecting coverage data - * for cleanup code. Such a node is called a "ghost". + * @loaded_info: array of pointers to profiling data sets for loaded object + * files. + * @num_loaded: number of profiling data sets for loaded object files. + * @unloaded_info: accumulated copy of profiling data sets for unloaded + * object files. Used only when gcov_persist=1. * @dentry: main debugfs entry, either a directory or data file * @links: associated symbolic links * @name: data file basename @@ -51,10 +52,11 @@ struct gcov_node { struct list_head children; struct list_head all; struct gcov_node *parent; - struct gcov_info *info; - struct gcov_info *ghost; + struct gcov_info **loaded_info; + struct gcov_info *unloaded_info; struct dentry *dentry; struct dentry **links; + int num_loaded; char name[0]; }; @@ -136,16 +138,37 @@ static const struct seq_operations gcov_seq_ops = { }; /* - * Return the profiling data set for a given node. This can either be the - * original profiling data structure or a duplicate (also called "ghost") - * in case the associated object file has been unloaded. + * Return a profiling data set associated with the given node. This is + * either a data set for a loaded object file or a data set copy in case + * all associated object files have been unloaded. */ static struct gcov_info *get_node_info(struct gcov_node *node) { - if (node->info) - return node->info; + if (node->num_loaded > 0) + return node->loaded_info[0]; - return node->ghost; + return node->unloaded_info; +} + +/* + * Return a newly allocated profiling data set which contains the sum of + * all profiling data associated with the given node. + */ +static struct gcov_info *get_accumulated_info(struct gcov_node *node) +{ + struct gcov_info *info; + int i = 0; + + if (node->unloaded_info) + info = gcov_info_dup(node->unloaded_info); + else + info = gcov_info_dup(node->loaded_info[i++]); + if (!info) + return NULL; + for (; i < node->num_loaded; i++) + gcov_info_add(info, node->loaded_info[i]); + + return info; } /* @@ -163,9 +186,10 @@ static int gcov_seq_open(struct inode *inode, struct file *file) mutex_lock(&node_lock); /* * Read from a profiling data copy to minimize reference tracking - * complexity and concurrent access. + * complexity and concurrent access and to keep accumulating multiple + * profiling data sets associated with one node simple. */ - info = gcov_info_dup(get_node_info(node)); + info = get_accumulated_info(node); if (!info) goto out_unlock; iter = gcov_iter_new(info); @@ -225,12 +249,25 @@ static struct gcov_node *get_node_by_name(const char *name) return NULL; } +/* + * Reset all profiling data associated with the specified node. + */ +static void reset_node(struct gcov_node *node) +{ + int i; + + if (node->unloaded_info) + gcov_info_reset(node->unloaded_info); + for (i = 0; i < node->num_loaded; i++) + gcov_info_reset(node->loaded_info[i]); +} + static void remove_node(struct gcov_node *node); /* * write() implementation for gcov data files. Reset profiling data for the - * associated file. If the object file has been unloaded (i.e. this is - * a "ghost" node), remove the debug fs node as well. + * corresponding file. If all associated object files have been unloaded, + * remove the debug fs node as well. */ static ssize_t gcov_seq_write(struct file *file, const char __user *addr, size_t len, loff_t *pos) @@ -245,10 +282,10 @@ static ssize_t gcov_seq_write(struct file *file, const char __user *addr, node = get_node_by_name(info->filename); if (node) { /* Reset counts or remove node for unloaded modules. */ - if (node->ghost) + if (node->num_loaded == 0) remove_node(node); else - gcov_info_reset(node->info); + reset_node(node); } /* Reset counts for open file. */ gcov_info_reset(info); @@ -378,7 +415,10 @@ static void init_node(struct gcov_node *node, struct gcov_info *info, INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); INIT_LIST_HEAD(&node->all); - node->info = info; + if (node->loaded_info) { + node->loaded_info[0] = info; + node->num_loaded = 1; + } node->parent = parent; if (name) strcpy(node->name, name); @@ -394,9 +434,13 @@ static struct gcov_node *new_node(struct gcov_node *parent, struct gcov_node *node; node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL); - if (!node) { - pr_warning("out of memory\n"); - return NULL; + if (!node) + goto err_nomem; + if (info) { + node->loaded_info = kcalloc(1, sizeof(struct gcov_info *), + GFP_KERNEL); + if (!node->loaded_info) + goto err_nomem; } init_node(node, info, name, parent); /* Differentiate between gcov data file nodes and directory nodes. */ @@ -416,6 +460,11 @@ static struct gcov_node *new_node(struct gcov_node *parent, list_add(&node->all, &all_head); return node; + +err_nomem: + kfree(node); + pr_warning("out of memory\n"); + return NULL; } /* Remove symbolic links associated with node. */ @@ -441,8 +490,9 @@ static void release_node(struct gcov_node *node) list_del(&node->all); debugfs_remove(node->dentry); remove_links(node); - if (node->ghost) - gcov_info_free(node->ghost); + kfree(node->loaded_info); + if (node->unloaded_info) + gcov_info_free(node->unloaded_info); kfree(node); } @@ -477,7 +527,7 @@ static struct gcov_node *get_child_by_name(struct gcov_node *parent, /* * write() implementation for reset file. Reset all profiling data to zero - * and remove ghost nodes. + * and remove nodes for which all associated object files are unloaded. */ static ssize_t reset_write(struct file *file, const char __user *addr, size_t len, loff_t *pos) @@ -487,8 +537,8 @@ static ssize_t reset_write(struct file *file, const char __user *addr, mutex_lock(&node_lock); restart: list_for_each_entry(node, &all_head, all) { - if (node->info) - gcov_info_reset(node->info); + if (node->num_loaded > 0) + reset_node(node); else if (list_empty(&node->children)) { remove_node(node); /* Several nodes may have gone - restart loop. */ @@ -564,37 +614,115 @@ err_remove: } /* - * The profiling data set associated with this node is being unloaded. Store a - * copy of the profiling data and turn this node into a "ghost". + * Associate a profiling data set with an existing node. Needs to be called + * with node_lock held. */ -static int ghost_node(struct gcov_node *node) +static void add_info(struct gcov_node *node, struct gcov_info *info) { - node->ghost = gcov_info_dup(node->info); - if (!node->ghost) { - pr_warning("could not save data for '%s' (out of memory)\n", - node->info->filename); - return -ENOMEM; + struct gcov_info **loaded_info; + int num = node->num_loaded; + + /* + * Prepare new array. This is done first to simplify cleanup in + * case the new data set is incompatible, the node only contains + * unloaded data sets and there's not enough memory for the array. + */ + loaded_info = kcalloc(num + 1, sizeof(struct gcov_info *), GFP_KERNEL); + if (!loaded_info) { + pr_warning("could not add '%s' (out of memory)\n", + info->filename); + return; + } + memcpy(loaded_info, node->loaded_info, + num * sizeof(struct gcov_info *)); + loaded_info[num] = info; + /* Check if the new data set is compatible. */ + if (num == 0) { + /* + * A module was unloaded, modified and reloaded. The new + * data set replaces the copy of the last one. + */ + if (!gcov_info_is_compatible(node->unloaded_info, info)) { + pr_warning("discarding saved data for %s " + "(incompatible version)\n", info->filename); + gcov_info_free(node->unloaded_info); + node->unloaded_info = NULL; + } + } else { + /* + * Two different versions of the same object file are loaded. + * The initial one takes precedence. + */ + if (!gcov_info_is_compatible(node->loaded_info[0], info)) { + pr_warning("could not add '%s' (incompatible " + "version)\n", info->filename); + kfree(loaded_info); + return; + } } - node->info = NULL; + /* Overwrite previous array. */ + kfree(node->loaded_info); + node->loaded_info = loaded_info; + node->num_loaded = num + 1; +} - return 0; +/* + * Return the index of a profiling data set associated with a node. + */ +static int get_info_index(struct gcov_node *node, struct gcov_info *info) +{ + int i; + + for (i = 0; i < node->num_loaded; i++) { + if (node->loaded_info[i] == info) + return i; + } + return -ENOENT; } /* - * Profiling data for this node has been loaded again. Add profiling data - * from previous instantiation and turn this node into a regular node. + * Save the data of a profiling data set which is being unloaded. */ -static void revive_node(struct gcov_node *node, struct gcov_info *info) +static void save_info(struct gcov_node *node, struct gcov_info *info) { - if (gcov_info_is_compatible(node->ghost, info)) - gcov_info_add(info, node->ghost); + if (node->unloaded_info) + gcov_info_add(node->unloaded_info, info); else { - pr_warning("discarding saved data for '%s' (version changed)\n", + node->unloaded_info = gcov_info_dup(info); + if (!node->unloaded_info) { + pr_warning("could not save data for '%s' " + "(out of memory)\n", info->filename); + } + } +} + +/* + * Disassociate a profiling data set from a node. Needs to be called with + * node_lock held. + */ +static void remove_info(struct gcov_node *node, struct gcov_info *info) +{ + int i; + + i = get_info_index(node, info); + if (i < 0) { + pr_warning("could not remove '%s' (not found)\n", info->filename); + return; } - gcov_info_free(node->ghost); - node->ghost = NULL; - node->info = info; + if (gcov_persist) + save_info(node, info); + /* Shrink array. */ + node->loaded_info[i] = node->loaded_info[node->num_loaded - 1]; + node->num_loaded--; + if (node->num_loaded > 0) + return; + /* Last loaded data set was removed. */ + kfree(node->loaded_info); + node->loaded_info = NULL; + node->num_loaded = 0; + if (!node->unloaded_info) + remove_node(node); } /* @@ -609,30 +737,18 @@ void gcov_event(enum gcov_action action, struct gcov_info *info) node = get_node_by_name(info->filename); switch (action) { case GCOV_ADD: - /* Add new node or revive ghost. */ - if (!node) { + if (node) + add_info(node, info); + else add_node(info); - break; - } - if (gcov_persist) - revive_node(node, info); - else { - pr_warning("could not add '%s' (already exists)\n", - info->filename); - } break; case GCOV_REMOVE: - /* Remove node or turn into ghost. */ - if (!node) { + if (node) + remove_info(node, info); + else { pr_warning("could not remove '%s' (not found)\n", info->filename); - break; } - if (gcov_persist) { - if (!ghost_node(node)) - break; - } - remove_node(node); break; } mutex_unlock(&node_lock); diff --git a/kernel/groups.c b/kernel/groups.c index 53b1916..253dc0f 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -143,10 +143,9 @@ int groups_search(const struct group_info *group_info, gid_t grp) right = group_info->ngroups; while (left < right) { unsigned int mid = (left+right)/2; - int cmp = grp - GROUP_AT(group_info, mid); - if (cmp > 0) + if (grp > GROUP_AT(group_info, mid)) left = mid + 1; - else if (cmp < 0) + else if (grp < GROUP_AT(group_info, mid)) right = mid; else return 1; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ce66917..1decafb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1091,11 +1091,10 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel); */ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) { - struct hrtimer_clock_base *base; unsigned long flags; ktime_t rem; - base = lock_hrtimer_base(timer, &flags); + lock_hrtimer_base(timer, &flags); rem = hrtimer_expires_remaining(timer); unlock_hrtimer_base(timer, &flags); diff --git a/kernel/mutex.c b/kernel/mutex.c index 4c0b7b3..200407c 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -36,15 +36,6 @@ # include #endif -/*** - * mutex_init - initialize the mutex - * @lock: the mutex to be initialized - * @key: the lock_class_key for the class; used by mutex lock debugging - * - * Initialize the mutex to unlocked state. - * - * It is not allowed to initialize an already locked mutex. - */ void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { @@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init); static __used noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_lock - acquire the mutex * @lock: the mutex to be acquired * @@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock); static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_unlock - release the mutex * @lock: the mutex to be released * @@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count); static noinline int __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count); -/*** - * mutex_lock_interruptible - acquire the mutex, interruptable +/** + * mutex_lock_interruptible - acquire the mutex, interruptible * @lock: the mutex to be acquired * * Lock the mutex like mutex_lock(), and return 0 if the mutex has @@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) return prev == 1; } -/*** - * mutex_trylock - try acquire the mutex, without waiting +/** + * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired * * Try to acquire the mutex atomically. Returns 1 if the mutex * has been acquired successfully, and 0 on contention. * * NOTE: this function follows the spin_trylock() convention, so - * it is negated to the down_trylock() return values! Be careful + * it is negated from the down_trylock() return values! Be careful * about this when converting semaphore users to mutexes. * * This function must not be used in interrupt context. The diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 403d180..db5b560 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -402,11 +402,31 @@ static void perf_group_detach(struct perf_event *event) } } +static inline int +event_filter_match(struct perf_event *event) +{ + return event->cpu == -1 || event->cpu == smp_processor_id(); +} + static void event_sched_out(struct perf_event *event, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx) { + u64 delta; + /* + * An event which could not be activated because of + * filter mismatch still needs to have its timings + * maintained, otherwise bogus information is return + * via read() for time_enabled, time_running: + */ + if (event->state == PERF_EVENT_STATE_INACTIVE + && !event_filter_match(event)) { + delta = ctx->time - event->tstamp_stopped; + event->tstamp_running += delta; + event->tstamp_stopped = ctx->time; + } + if (event->state != PERF_EVENT_STATE_ACTIVE) return; @@ -432,9 +452,7 @@ group_sched_out(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event; - - if (group_event->state != PERF_EVENT_STATE_ACTIVE) - return; + int state = group_event->state; event_sched_out(group_event, cpuctx, ctx); @@ -444,7 +462,7 @@ group_sched_out(struct perf_event *group_event, list_for_each_entry(event, &group_event->sibling_list, group_entry) event_sched_out(event, cpuctx, ctx); - if (group_event->attr.exclusive) + if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive) cpuctx->exclusive = 0; } @@ -5743,15 +5761,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: + case CPU_DOWN_FAILED: perf_event_init_cpu(cpu); break; + case CPU_UP_CANCELED: case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: perf_event_exit_cpu(cpu); break; diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index b7e4c36..645e541 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -389,10 +389,12 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, } else if (count == 11) { /* len('0x12345678/0') */ if (copy_from_user(ascii_value, buf, 11)) return -EFAULT; + if (strlen(ascii_value) != 10) + return -EINVAL; x = sscanf(ascii_value, "%x", &value); if (x != 1) return -EINVAL; - pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value); + pr_debug("%s, %d, 0x%x\n", ascii_value, x, value); } else return -EINVAL; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c779639..8dc31e0 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); - hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5e7edfb..d3f795f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,7 +1086,6 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; - hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ @@ -1122,9 +1121,19 @@ static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) return nr_alloc; } -static unsigned long preallocate_image_memory(unsigned long nr_pages) +static unsigned long preallocate_image_memory(unsigned long nr_pages, + unsigned long avail_normal) { - return preallocate_image_pages(nr_pages, GFP_IMAGE); + unsigned long alloc; + + if (avail_normal <= alloc_normal) + return 0; + + alloc = avail_normal - alloc_normal; + if (nr_pages < alloc) + alloc = nr_pages; + + return preallocate_image_pages(alloc, GFP_IMAGE); } #ifdef CONFIG_HIGHMEM @@ -1170,15 +1179,22 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, */ static void free_unnecessary_pages(void) { - unsigned long save_highmem, to_free_normal, to_free_highmem; + unsigned long save, to_free_normal, to_free_highmem; - to_free_normal = alloc_normal - count_data_pages(); - save_highmem = count_highmem_pages(); - if (alloc_highmem > save_highmem) { - to_free_highmem = alloc_highmem - save_highmem; + save = count_data_pages(); + if (alloc_normal >= save) { + to_free_normal = alloc_normal - save; + save = 0; + } else { + to_free_normal = 0; + save -= alloc_normal; + } + save += count_highmem_pages(); + if (alloc_highmem >= save) { + to_free_highmem = alloc_highmem - save; } else { to_free_highmem = 0; - to_free_normal -= save_highmem - alloc_highmem; + to_free_normal -= save - alloc_highmem; } memory_bm_position_reset(©_bm); @@ -1259,7 +1275,7 @@ int hibernate_preallocate_memory(void) { struct zone *zone; unsigned long saveable, size, max_size, count, highmem, pages = 0; - unsigned long alloc, save_highmem, pages_highmem; + unsigned long alloc, save_highmem, pages_highmem, avail_normal; struct timeval start, stop; int error; @@ -1296,6 +1312,7 @@ int hibernate_preallocate_memory(void) else count += zone_page_state(zone, NR_FREE_PAGES); } + avail_normal = count; count += highmem; count -= totalreserve_pages; @@ -1310,12 +1327,21 @@ int hibernate_preallocate_memory(void) */ if (size >= saveable) { pages = preallocate_image_highmem(save_highmem); - pages += preallocate_image_memory(saveable - pages); + pages += preallocate_image_memory(saveable - pages, avail_normal); goto out; } /* Estimate the minimum size of the image. */ pages = minimum_image_size(saveable); + /* + * To avoid excessive pressure on the normal zone, leave room in it to + * accommodate an image of the minimum size (unless it's already too + * small, in which case don't preallocate pages from it at all). + */ + if (avail_normal > pages) + avail_normal -= pages; + else + avail_normal = 0; if (size < pages) size = min_t(unsigned long, pages, max_size); @@ -1336,16 +1362,34 @@ int hibernate_preallocate_memory(void) */ pages_highmem = preallocate_image_highmem(highmem / 2); alloc = (count - max_size) - pages_highmem; - pages = preallocate_image_memory(alloc); - if (pages < alloc) - goto err_out; - size = max_size - size; - alloc = size; - size = preallocate_highmem_fraction(size, highmem, count); - pages_highmem += size; - alloc -= size; - pages += preallocate_image_memory(alloc); - pages += pages_highmem; + pages = preallocate_image_memory(alloc, avail_normal); + if (pages < alloc) { + /* We have exhausted non-highmem pages, try highmem. */ + alloc -= pages; + pages += pages_highmem; + pages_highmem = preallocate_image_highmem(alloc); + if (pages_highmem < alloc) + goto err_out; + pages += pages_highmem; + /* + * size is the desired number of saveable pages to leave in + * memory, so try to preallocate (all memory - size) pages. + */ + alloc = (count - pages) - size; + pages += preallocate_image_highmem(alloc); + } else { + /* + * There are approximately max_size saveable pages at this point + * and we want to reduce this number down to size. + */ + alloc = max_size - size; + size = preallocate_highmem_fraction(alloc, highmem, count); + pages_highmem += size; + alloc -= size; + size = preallocate_image_memory(alloc, avail_normal); + pages_highmem += preallocate_image_highmem(alloc - size); + pages += pages_highmem + size; + } /* * We only need as many page frames for the image as there are saveable diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 5d0059e..e6a5bdf 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_for_hibernation(swap)); + offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); kfree(ext); } diff --git a/kernel/sched.c b/kernel/sched.c index 09b574e..ed09d4f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1294,6 +1294,10 @@ static void resched_task(struct task_struct *p) static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { } + +static void sched_avg_update(struct rq *rq) +{ +} #endif /* CONFIG_SMP */ #if BITS_PER_LONG == 32 @@ -3182,6 +3186,8 @@ static void update_cpu_load(struct rq *this_rq) this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; } + + sched_avg_update(this_rq); } static void update_cpu_load_active(struct rq *this_rq) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ab661eb..a171138 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -54,13 +54,13 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling * Minimal preemption granularity for CPU-bound tasks: * (default: 2 msec * (1 + ilog(ncpus)), units: nanoseconds) */ -unsigned int sysctl_sched_min_granularity = 2000000ULL; -unsigned int normalized_sysctl_sched_min_granularity = 2000000ULL; +unsigned int sysctl_sched_min_granularity = 750000ULL; +unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; /* * is kept at sysctl_sched_latency / sysctl_sched_min_granularity */ -static unsigned int sched_nr_latency = 3; +static unsigned int sched_nr_latency = 8; /* * After fork, child runs first. If set to 0 (default) then @@ -1313,7 +1313,7 @@ static struct sched_group * find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu, int load_idx) { - struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; + struct sched_group *idlest = NULL, *group = sd->groups; unsigned long min_load = ULONG_MAX, this_load = 0; int imbalance = 100 + (sd->imbalance_pct-100)/2; @@ -1348,7 +1348,6 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, if (local_group) { this_load = avg_load; - this = group; } else if (avg_load < min_load) { min_load = avg_load; idlest = group; @@ -2268,8 +2267,6 @@ unsigned long scale_rt_power(int cpu) struct rq *rq = cpu_rq(cpu); u64 total, available; - sched_avg_update(rq); - total = sched_avg_period() + (rq->clock - rq->age_stamp); available = total - rq->rt_avg; diff --git a/kernel/sys.c b/kernel/sys.c index e9ad444..7f5a0cd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -931,6 +931,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) pgid = pid; if (pgid < 0) return -EINVAL; + rcu_read_lock(); /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM @@ -984,6 +985,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) out: /* All paths lead to here, thus we are safe. -DaveM */ write_unlock_irq(&tasklist_lock); + rcu_read_unlock(); return err; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ca38e8e..f88552c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1713,10 +1713,7 @@ static __init int sysctl_init(void) { sysctl_set_parent(NULL, root_table); #ifdef CONFIG_SYSCTL_SYSCALL_CHECK - { - int err; - err = sysctl_check_table(current->nsproxy, root_table); - } + sysctl_check_table(current->nsproxy, root_table); #endif return 0; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0d88ce9..fa7ece6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -381,12 +381,19 @@ static int function_stat_show(struct seq_file *m, void *v) { struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; + int ret = 0; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - static DEFINE_MUTEX(mutex); static struct trace_seq s; unsigned long long avg; unsigned long long stddev; #endif + mutex_lock(&ftrace_profile_lock); + + /* we raced with function_profile_reset() */ + if (unlikely(rec->counter == 0)) { + ret = -EBUSY; + goto out; + } kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); seq_printf(m, " %-30.30s %10lu", str, rec->counter); @@ -408,7 +415,6 @@ static int function_stat_show(struct seq_file *m, void *v) do_div(stddev, (rec->counter - 1) * 1000); } - mutex_lock(&mutex); trace_seq_init(&s); trace_print_graph_duration(rec->time, &s); trace_seq_puts(&s, " "); @@ -416,11 +422,12 @@ static int function_stat_show(struct seq_file *m, void *v) trace_seq_puts(&s, " "); trace_print_graph_duration(stddev, &s); trace_print_seq(m, &s); - mutex_unlock(&mutex); #endif seq_putc(m, '\n'); +out: + mutex_unlock(&ftrace_profile_lock); - return 0; + return ret; } static void ftrace_profile_reset(struct ftrace_profile_stat *stat) @@ -1503,6 +1510,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; + /* reset in case of seek/pread */ + iter->flags &= ~FTRACE_ITER_HASH; return iter; } @@ -2409,7 +2418,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = ftrace_regex_lseek, + .llseek = no_llseek, .release = ftrace_filter_release, }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 19cccc3..492197e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2985,13 +2985,11 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) static void rb_advance_iter(struct ring_buffer_iter *iter) { - struct ring_buffer *buffer; struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; unsigned length; cpu_buffer = iter->cpu_buffer; - buffer = cpu_buffer->buffer; /* * Check if we are at the end of the buffer. diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 000e6e8..31cc4cb 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -91,6 +91,8 @@ int perf_trace_init(struct perf_event *p_event) tp_event->class && tp_event->class->reg && try_module_get(tp_event->mod)) { ret = perf_trace_event_init(tp_event, p_event); + if (ret) + module_put(tp_event->mod); break; } } @@ -146,6 +148,7 @@ void perf_trace_destroy(struct perf_event *p_event) } } out: + module_put(tp_event->mod); mutex_unlock(&event_mutex); } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8b27c98..544301d 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -514,8 +514,8 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs); -/* Check the name is good for event/group */ -static int check_event_name(const char *name) +/* Check the name is good for event/group/fields */ +static int is_good_name(const char *name) { if (!isalpha(*name) && *name != '_') return 0; @@ -557,7 +557,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, else tp->rp.kp.pre_handler = kprobe_dispatcher; - if (!event || !check_event_name(event)) { + if (!event || !is_good_name(event)) { ret = -EINVAL; goto error; } @@ -567,7 +567,7 @@ static struct trace_probe *alloc_trace_probe(const char *group, if (!tp->call.name) goto error; - if (!group || !check_event_name(group)) { + if (!group || !is_good_name(group)) { ret = -EINVAL; goto error; } @@ -883,7 +883,7 @@ static int create_trace_probe(int argc, char **argv) int i, ret = 0; int is_return = 0, is_delete = 0; char *symbol = NULL, *event = NULL, *group = NULL; - char *arg, *tmp; + char *arg; unsigned long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -992,26 +992,36 @@ static int create_trace_probe(int argc, char **argv) /* parse arguments */ ret = 0; for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { + /* Increment count for freeing args in error case */ + tp->nr_args++; + /* Parse argument name */ arg = strchr(argv[i], '='); - if (arg) + if (arg) { *arg++ = '\0'; - else + tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); + } else { arg = argv[i]; + /* If argument name is omitted, set "argN" */ + snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); + tp->args[i].name = kstrdup(buf, GFP_KERNEL); + } - tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); if (!tp->args[i].name) { - pr_info("Failed to allocate argument%d name '%s'.\n", - i, argv[i]); + pr_info("Failed to allocate argument[%d] name.\n", i); ret = -ENOMEM; goto error; } - tmp = strchr(tp->args[i].name, ':'); - if (tmp) - *tmp = '_'; /* convert : to _ */ + + if (!is_good_name(tp->args[i].name)) { + pr_info("Invalid argument[%d] name: %s\n", + i, tp->args[i].name); + ret = -EINVAL; + goto error; + } if (conflict_field_name(tp->args[i].name, tp->args, i)) { - pr_info("Argument%d name '%s' conflicts with " + pr_info("Argument[%d] name '%s' conflicts with " "another field.\n", i, argv[i]); ret = -EINVAL; goto error; @@ -1020,12 +1030,9 @@ static int create_trace_probe(int argc, char **argv) /* Parse fetch argument */ ret = parse_probe_arg(arg, tp, &tp->args[i], is_return); if (ret) { - pr_info("Parse error at argument%d. (%d)\n", i, ret); - kfree(tp->args[i].name); + pr_info("Parse error at argument[%d]. (%d)\n", i, ret); goto error; } - - tp->nr_args++; } ret = register_trace_probe(tp); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 0d53c8e..7f9c3c5 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -122,7 +122,7 @@ static void __touch_watchdog(void) void touch_softlockup_watchdog(void) { - __get_cpu_var(watchdog_touch_ts) = 0; + __raw_get_cpu_var(watchdog_touch_ts) = 0; } EXPORT_SYMBOL(touch_softlockup_watchdog); @@ -142,7 +142,14 @@ void touch_all_softlockup_watchdogs(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { - __get_cpu_var(watchdog_nmi_touch) = true; + if (watchdog_enabled) { + unsigned cpu; + + for_each_present_cpu(cpu) { + if (per_cpu(watchdog_nmi_touch, cpu) != true) + per_cpu(watchdog_nmi_touch, cpu) = true; + } + } touch_softlockup_watchdog(); } EXPORT_SYMBOL(touch_nmi_watchdog); @@ -433,6 +440,9 @@ static int watchdog_enable(int cpu) wake_up_process(p); } + /* if any cpu succeeds, watchdog is considered enabled for the system */ + watchdog_enabled = 1; + return 0; } @@ -455,9 +465,6 @@ static void watchdog_disable(int cpu) per_cpu(softlockup_watchdog, cpu) = NULL; kthread_stop(p); } - - /* if any cpu succeeds, watchdog is considered enabled for the system */ - watchdog_enabled = 1; } static void watchdog_enable_all_cpus(void) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8bd600c..f77afd9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1,19 +1,26 @@ /* - * linux/kernel/workqueue.c + * kernel/workqueue.c - generic async execution with shared worker pool * - * Generic mechanism for defining kernel helper threads for running - * arbitrary tasks in process context. + * Copyright (C) 2002 Ingo Molnar * - * Started by Ingo Molnar, Copyright (C) 2002 + * Derived from the taskqueue/keventd code by: + * David Woodhouse + * Andrew Morton + * Kai Petzke + * Theodore Ts'o * - * Derived from the taskqueue/keventd code by: + * Made to use alloc_percpu by Christoph Lameter. * - * David Woodhouse - * Andrew Morton - * Kai Petzke - * Theodore Ts'o + * Copyright (C) 2010 SUSE Linux Products GmbH + * Copyright (C) 2010 Tejun Heo * - * Made to use alloc_percpu by Christoph Lameter. + * This is the generic async execution mechanism. Work items as are + * executed in process context. The worker pool is shared and + * automatically managed. There is one worker pool for each CPU and + * one extra for works which are better served by workers which are + * not bound to any specific CPU. + * + * Please read Documentation/workqueue.txt for details. */ #include @@ -90,7 +97,8 @@ enum { /* * Structure fields follow one of the following exclusion rules. * - * I: Set during initialization and read-only afterwards. + * I: Modifiable by initialization/destruction paths and read-only for + * everyone else. * * P: Preemption protected. Disabling preemption is enough and should * only be modified and accessed from the local cpu. @@ -198,7 +206,7 @@ typedef cpumask_var_t mayday_mask_t; cpumask_test_and_set_cpu((cpu), (mask)) #define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask)) #define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask)) -#define alloc_mayday_mask(maskp, gfp) alloc_cpumask_var((maskp), (gfp)) +#define alloc_mayday_mask(maskp, gfp) zalloc_cpumask_var((maskp), (gfp)) #define free_mayday_mask(mask) free_cpumask_var((mask)) #else typedef unsigned long mayday_mask_t; @@ -943,10 +951,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct global_cwq *gcwq; struct cpu_workqueue_struct *cwq; struct list_head *worklist; + unsigned int work_flags; unsigned long flags; debug_work_activate(work); + if (WARN_ON_ONCE(wq->flags & WQ_DYING)) + return; + /* determine gcwq to use */ if (!(wq->flags & WQ_UNBOUND)) { struct global_cwq *last_gcwq; @@ -989,14 +1001,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, BUG_ON(!list_empty(&work->entry)); cwq->nr_in_flight[cwq->work_color]++; + work_flags = work_color_to_flags(cwq->work_color); if (likely(cwq->nr_active < cwq->max_active)) { cwq->nr_active++; worklist = gcwq_determine_ins_pos(gcwq, cwq); - } else + } else { + work_flags |= WORK_STRUCT_DELAYED; worklist = &cwq->delayed_works; + } - insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color)); + insert_work(cwq, work, worklist, work_flags); spin_unlock_irqrestore(&gcwq->lock, flags); } @@ -1215,6 +1230,7 @@ static void worker_leave_idle(struct worker *worker) * bound), %false if offline. */ static bool worker_maybe_bind_and_lock(struct worker *worker) +__acquires(&gcwq->lock) { struct global_cwq *gcwq = worker->gcwq; struct task_struct *task = worker->task; @@ -1488,6 +1504,8 @@ static void gcwq_mayday_timeout(unsigned long __gcwq) * otherwise. */ static bool maybe_create_worker(struct global_cwq *gcwq) +__releases(&gcwq->lock) +__acquires(&gcwq->lock) { if (!need_to_create_worker(gcwq)) return false; @@ -1662,6 +1680,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); move_linked_works(work, pos, NULL); + __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); cwq->nr_active++; } @@ -1669,6 +1688,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * @cwq: cwq of interest * @color: color of work which left the queue + * @delayed: for a delayed work * * A work either has completed or is removed from pending queue, * decrement nr_in_flight of its cwq and handle workqueue flushing. @@ -1676,19 +1696,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) * CONTEXT: * spin_lock_irq(gcwq->lock). */ -static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) +static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color, + bool delayed) { /* ignore uncolored works */ if (color == WORK_NO_COLOR) return; cwq->nr_in_flight[color]--; - cwq->nr_active--; - if (!list_empty(&cwq->delayed_works)) { - /* one down, submit a delayed one */ - if (cwq->nr_active < cwq->max_active) - cwq_activate_first_delayed(cwq); + if (!delayed) { + cwq->nr_active--; + if (!list_empty(&cwq->delayed_works)) { + /* one down, submit a delayed one */ + if (cwq->nr_active < cwq->max_active) + cwq_activate_first_delayed(cwq); + } } /* is flush in progress and are we at the flushing tip? */ @@ -1725,6 +1748,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) * spin_lock_irq(gcwq->lock) which is released and regrabbed. */ static void process_one_work(struct worker *worker, struct work_struct *work) +__releases(&gcwq->lock) +__acquires(&gcwq->lock) { struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct global_cwq *gcwq = cwq->gcwq; @@ -1823,7 +1848,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work) hlist_del_init(&worker->hentry); worker->current_work = NULL; worker->current_cwq = NULL; - cwq_dec_nr_in_flight(cwq, work_color); + cwq_dec_nr_in_flight(cwq, work_color, false); } /** @@ -2388,7 +2413,8 @@ static int try_to_grab_pending(struct work_struct *work) debug_work_deactivate(work); list_del_init(&work->entry); cwq_dec_nr_in_flight(get_work_cwq(work), - get_work_color(work)); + get_work_color(work), + *work_data_bits(work) & WORK_STRUCT_DELAYED); ret = 1; } } @@ -2791,7 +2817,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, if (IS_ERR(rescuer->task)) goto err; - wq->rescuer = rescuer; rescuer->task->flags |= PF_THREAD_BOUND; wake_up_process(rescuer->task); } @@ -2833,6 +2858,7 @@ void destroy_workqueue(struct workqueue_struct *wq) { unsigned int cpu; + wq->flags |= WQ_DYING; flush_workqueue(wq); /* @@ -2857,6 +2883,7 @@ void destroy_workqueue(struct workqueue_struct *wq) if (wq->flags & WQ_RESCUER) { kthread_stop(wq->rescuer->task); free_mayday_mask(wq->mayday_mask); + kfree(wq->rescuer); } free_cwqs(wq); @@ -3239,6 +3266,8 @@ static int __cpuinit trustee_thread(void *__gcwq) * multiple times. To be used by cpu_callback. */ static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state) +__releases(&gcwq->lock) +__acquires(&gcwq->lock) { if (!(gcwq->trustee_state == state || gcwq->trustee_state == TRUSTEE_DONE)) { @@ -3545,8 +3574,7 @@ static int __init init_workqueues(void) spin_lock_init(&gcwq->lock); INIT_LIST_HEAD(&gcwq->worklist); gcwq->cpu = cpu; - if (cpu == WORK_CPU_UNBOUND) - gcwq->flags |= GCWQ_DISASSOCIATED; + gcwq->flags |= GCWQ_DISASSOCIATED; INIT_LIST_HEAD(&gcwq->idle_list); for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) @@ -3570,6 +3598,8 @@ static int __init init_workqueues(void) struct global_cwq *gcwq = get_gcwq(cpu); struct worker *worker; + if (cpu != WORK_CPU_UNBOUND) + gcwq->flags &= ~GCWQ_DISASSOCIATED; worker = create_worker(gcwq, true); BUG_ON(!worker); spin_lock_irq(&gcwq->lock); diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore new file mode 100644 index 0000000..162beca --- /dev/null +++ b/lib/raid6/.gitignore @@ -0,0 +1,4 @@ +mktables +altivec*.c +int*.c +tables.c diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a5ec428..4ceb05d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -248,8 +248,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, left -= sg_size; sg = alloc_fn(alloc_size, gfp_mask); - if (unlikely(!sg)) - return -ENOMEM; + if (unlikely(!sg)) { + /* + * Adjust entry count to reflect that the last + * entry of the previous table won't be used for + * linkage. Without this, sg_kfree() may get + * confused. + */ + if (prv) + table->nents = ++table->orig_nents; + + return -ENOMEM; + } sg_init_table(sg, alloc_size); table->nents = table->orig_nents += sg_size; diff --git a/mm/Kconfig b/mm/Kconfig index f4e516e..f0fb912 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -189,7 +189,7 @@ config COMPACTION config MIGRATION bool "Page migration" def_bool y - depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE + depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION help Allows the migration of the physical location of pages of processes while the virtual addresses are not changed. This is useful in diff --git a/mm/backing-dev.c b/mm/backing-dev.c index eaa4a5b..65d4204 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -30,6 +30,7 @@ EXPORT_SYMBOL_GPL(default_backing_dev_info); struct backing_dev_info noop_backing_dev_info = { .name = "noop", + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, }; EXPORT_SYMBOL_GPL(noop_backing_dev_info); @@ -243,6 +244,7 @@ static int __init default_bdi_init(void) err = bdi_init(&default_backing_dev_info); if (!err) bdi_register(&default_backing_dev_info, NULL, "default"); + err = bdi_init(&noop_backing_dev_info); return err; } @@ -445,8 +447,8 @@ static int bdi_forker_thread(void *ptr) switch (action) { case FORK_THREAD: __set_current_state(TASK_RUNNING); - task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", - dev_name(bdi->dev)); + task = kthread_create(bdi_writeback_thread, &bdi->wb, + "flush-%s", dev_name(bdi->dev)); if (IS_ERR(task)) { /* * If thread creation fails, force writeout of @@ -457,10 +459,13 @@ static int bdi_forker_thread(void *ptr) /* * The spinlock makes sure we do not lose * wake-ups when racing with 'bdi_queue_work()'. + * And as soon as the bdi thread is visible, we + * can start it. */ spin_lock_bh(&bdi->wb_lock); bdi->wb.task = task; spin_unlock_bh(&bdi->wb_lock); + wake_up_process(task); } break; diff --git a/mm/bounce.c b/mm/bounce.c index 13b6dad..1481de6 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -116,8 +116,8 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from) */ vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; - flush_dcache_page(tovec->bv_page); bounce_copy_vec(tovec, vfrom); + flush_dcache_page(tovec->bv_page); } } diff --git a/mm/compaction.c b/mm/compaction.c index 94cce51..4d709ee 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -214,15 +214,16 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc) /* Similar to reclaim, but different enough that they don't share logic */ static bool too_many_isolated(struct zone *zone) { - - unsigned long inactive, isolated; + unsigned long active, inactive, isolated; inactive = zone_page_state(zone, NR_INACTIVE_FILE) + zone_page_state(zone, NR_INACTIVE_ANON); + active = zone_page_state(zone, NR_ACTIVE_FILE) + + zone_page_state(zone, NR_ACTIVE_ANON); isolated = zone_page_state(zone, NR_ISOLATED_FILE) + zone_page_state(zone, NR_ISOLATED_ANON); - return isolated > inactive; + return isolated > (inactive + active) / 2; } /* diff --git a/mm/ksm.c b/mm/ksm.c index e2ae004..b1873cf 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page, { struct page *new_page; - unlock_page(page); /* any racers will COW it, not modify it */ - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { copy_user_highpage(new_page, page, address, vma); @@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page, add_page_to_unevictable_list(new_page); } - page_cache_release(page); return new_page; } diff --git a/mm/memory.c b/mm/memory.c index 6b2ab10..71b161b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int flags, pte_t orig_pte) { spinlock_t *ptl; - struct page *page; + struct page *page, *swapcache = NULL; swp_entry_t entry; pte_t pte; struct mem_cgroup *ptr = NULL; @@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - page = ksm_might_need_to_copy(page, vma, address); - if (!page) { - ret = VM_FAULT_OOM; - goto out; + /* + * Make sure try_to_free_swap didn't release the swapcache + * from under us. The page pin isn't enough to prevent that. + */ + if (unlikely(!PageSwapCache(page))) + goto out_page; + + if (ksm_might_need_to_copy(page, vma, address)) { + swapcache = page; + page = ksm_does_need_to_copy(page, vma, address); + + if (unlikely(!page)) { + ret = VM_FAULT_OOM; + page = swapcache; + swapcache = NULL; + goto out_page; + } } if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { @@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); + if (swapcache) { + /* + * Hold the lock to avoid the swap entry to be reused + * until we take the PT lock for the pte_same() check + * (to avoid false positives from pte_same). For + * further safety release the lock after the swap_free + * so that the swap count won't change under a + * parallel locked swapcache. + */ + unlock_page(swapcache); + page_cache_release(swapcache); + } if (flags & FAULT_FLAG_WRITE) { ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); @@ -2756,6 +2781,10 @@ out_page: unlock_page(page); out_release: page_cache_release(page); + if (swapcache) { + unlock_page(swapcache); + page_cache_release(swapcache); + } return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a4cfcdc..dd186c1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -584,19 +584,19 @@ static inline int pageblock_free(struct page *page) /* Return the start of the next active pageblock after a given page */ static struct page *next_active_pageblock(struct page *page) { - int pageblocks_stride; - /* Ensure the starting page is pageblock-aligned */ BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); - /* Move forward by at least 1 * pageblock_nr_pages */ - pageblocks_stride = 1; - /* If the entire pageblock is free, move to the end of free page */ - if (pageblock_free(page)) - pageblocks_stride += page_order(page) - pageblock_order; + if (pageblock_free(page)) { + int order; + /* be careful. we don't have locks, page_order can be changed.*/ + order = page_order(page); + if ((order < MAX_ORDER) && (order >= pageblock_order)) + return page + (1 << order); + } - return page + (pageblocks_stride * pageblock_nr_pages); + return page + pageblock_nr_pages; } /* Checks if this range of memory is likely to be hot-removable. */ diff --git a/mm/mlock.c b/mm/mlock.c index cbae7c5..b70919c 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page) } } -/* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); -} - static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { return (vma->vm_flags & VM_GROWSDOWN) && diff --git a/mm/mmzone.c b/mm/mmzone.c index f5b7d17..e35bfb8 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn, return 1; } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + +#ifdef CONFIG_SMP +/* Called when a more accurate view of NR_FREE_PAGES is needed */ +unsigned long zone_nr_free_pages(struct zone *zone) +{ + unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES); + + /* + * While kswapd is awake, it is considered the zone is under some + * memory pressure. Under pressure, there is a risk that + * per-cpu-counter-drift will allow the min watermark to be breached + * potentially causing a live-lock. While kswapd is awake and + * free pages are low, get a better estimate for free pages + */ + if (nr_free_pages < zone->percpu_drift_mark && + !waitqueue_active(&zone->zone_pgdat->kswapd_wait)) + return zone_page_state_snapshot(zone, NR_FREE_PAGES); + + return nr_free_pages; +} +#endif /* CONFIG_SMP */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a9649f4..a8cfa9c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -588,13 +588,13 @@ static void free_pcppages_bulk(struct zone *zone, int count, { int migratetype = 0; int batch_free = 0; + int to_free = count; spin_lock(&zone->lock); zone->all_unreclaimable = 0; zone->pages_scanned = 0; - __mod_zone_page_state(zone, NR_FREE_PAGES, count); - while (count) { + while (to_free) { struct page *page; struct list_head *list; @@ -619,8 +619,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ __free_one_page(page, zone, 0, page_private(page)); trace_mm_page_pcpu_drain(page, 0, page_private(page)); - } while (--count && --batch_free && !list_empty(list)); + } while (--to_free && --batch_free && !list_empty(list)); } + __mod_zone_page_state(zone, NR_FREE_PAGES, count); spin_unlock(&zone->lock); } @@ -631,8 +632,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order, zone->all_unreclaimable = 0; zone->pages_scanned = 0; - __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); __free_one_page(page, zone, order, migratetype); + __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); spin_unlock(&zone->lock); } @@ -1461,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, { /* free_pages my go negative - that's OK */ long min = mark; - long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; + long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -1846,6 +1847,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, struct page *page = NULL; struct reclaim_state reclaim_state; struct task_struct *p = current; + bool drained = false; cond_resched(); @@ -1864,14 +1866,25 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); - if (order != 0) - drain_all_pages(); + if (unlikely(!(*did_some_progress))) + return NULL; - if (likely(*did_some_progress)) - page = get_page_from_freelist(gfp_mask, nodemask, order, +retry: + page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, migratetype); + + /* + * If an allocation failed after direct reclaim, it could be because + * pages are pinned on the per-cpu lists. Drain them and try again + */ + if (!page && !drained) { + drain_all_pages(); + drained = true; + goto retry; + } + return page; } @@ -2423,7 +2436,7 @@ void show_free_areas(void) " all_unreclaimable? %s" "\n", zone->name, - K(zone_page_state(zone, NR_FREE_PAGES)), + K(zone_nr_free_pages(zone)), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), diff --git a/mm/percpu.c b/mm/percpu.c index e61dc2c..58c572b 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -393,7 +393,9 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) goto out_unlock; old_size = chunk->map_alloc * sizeof(chunk->map[0]); - memcpy(new, chunk->map, old_size); + old = chunk->map; + + memcpy(new, old, old_size); chunk->map_alloc = new_alloc; chunk->map = new; @@ -1162,7 +1164,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info( } /* - * Don't accept if wastage is over 25%. The + * Don't accept if wastage is over 1/3. The * greater-than comparison ensures upa==1 always * passes the following check. */ diff --git a/mm/percpu_up.c b/mm/percpu_up.c index c4351c7..db884fa 100644 --- a/mm/percpu_up.c +++ b/mm/percpu_up.c @@ -14,13 +14,13 @@ void __percpu *__alloc_percpu(size_t size, size_t align) * percpu sections on SMP for which this path isn't used. */ WARN_ON_ONCE(align > SMP_CACHE_BYTES); - return kzalloc(size, GFP_KERNEL); + return (void __percpu __force *)kzalloc(size, GFP_KERNEL); } EXPORT_SYMBOL_GPL(__alloc_percpu); void free_percpu(void __percpu *p) { - kfree(p); + kfree(this_cpu_ptr(p)); } EXPORT_SYMBOL_GPL(free_percpu); diff --git a/mm/swapfile.c b/mm/swapfile.c index 1f3f9c5..9fc7bac 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,8 +47,6 @@ long nr_swap_pages; long total_swap_pages; static int least_priority; -static bool swap_for_hibernation; - static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -141,8 +139,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = ((sector_t)se->nr_pages - 1) << (PAGE_SHIFT - 9); if (nr_blocks) { err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + nr_blocks, GFP_KERNEL, 0); if (err) return err; cond_resched(); @@ -153,8 +150,7 @@ static int discard_swap(struct swap_info_struct *si) nr_blocks = (sector_t)se->nr_pages << (PAGE_SHIFT - 9); err = blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_KERNEL, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + nr_blocks, GFP_KERNEL, 0); if (err) break; @@ -193,8 +189,7 @@ static void discard_swap_cluster(struct swap_info_struct *si, start_block <<= PAGE_SHIFT - 9; nr_blocks <<= PAGE_SHIFT - 9; if (blkdev_issue_discard(si->bdev, start_block, - nr_blocks, GFP_NOIO, BLKDEV_IFL_WAIT | - BLKDEV_IFL_BARRIER)) + nr_blocks, GFP_NOIO, 0)) break; } @@ -320,10 +315,8 @@ checks: if (offset > si->highest_bit) scan_base = offset = si->lowest_bit; - /* reuse swap entry of cache-only swap if not hibernation. */ - if (vm_swap_full() - && usage == SWAP_HAS_CACHE - && si->swap_map[offset] == SWAP_HAS_CACHE) { + /* reuse swap entry of cache-only swap if not busy. */ + if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) { int swap_was_freed; spin_unlock(&swap_lock); swap_was_freed = __try_to_reclaim_swap(si, offset); @@ -453,8 +446,6 @@ swp_entry_t get_swap_page(void) spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; - if (swap_for_hibernation) - goto noswap; nr_swap_pages--; for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { @@ -487,6 +478,28 @@ noswap: return (swp_entry_t) {0}; } +/* The only caller of this function is now susupend routine */ +swp_entry_t get_swap_page_of_type(int type) +{ + struct swap_info_struct *si; + pgoff_t offset; + + spin_lock(&swap_lock); + si = swap_info[type]; + if (si && (si->flags & SWP_WRITEOK)) { + nr_swap_pages--; + /* This is called for allocating swap entry, not cache */ + offset = scan_swap_map(si, 1); + if (offset) { + spin_unlock(&swap_lock); + return swp_entry(type, offset); + } + nr_swap_pages++; + } + spin_unlock(&swap_lock); + return (swp_entry_t) {0}; +} + static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -670,6 +683,24 @@ int try_to_free_swap(struct page *page) if (page_swapcount(page)) return 0; + /* + * Once hibernation has begun to create its image of memory, + * there's a danger that one of the calls to try_to_free_swap() + * - most probably a call from __try_to_reclaim_swap() while + * hibernation is allocating its own swap pages for the image, + * but conceivably even a call from memory reclaim - will free + * the swap from a page which has already been recorded in the + * image as a clean swapcache page, and then reuse its swap for + * another page of the image. On waking from hibernation, the + * original page might be freed under memory pressure, then + * later read back in from swap, now with the wrong data. + * + * Hibernation clears bits from gfp_allowed_mask to prevent + * memory reclaim from writing to disk, so check that here. + */ + if (!(gfp_allowed_mask & __GFP_IO)) + return 0; + delete_from_swap_cache(page); SetPageDirty(page); return 1; @@ -746,74 +777,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) #endif #ifdef CONFIG_HIBERNATION - -static pgoff_t hibernation_offset[MAX_SWAPFILES]; -/* - * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise, - * saved swap_map[] image to the disk will be an incomplete because it's - * changing without synchronization with hibernation snap shot. - * At resume, we just make swap_for_hibernation=false. We can forget - * used maps easily. - */ -void hibernation_freeze_swap(void) -{ - int i; - - spin_lock(&swap_lock); - - printk(KERN_INFO "PM: Freeze Swap\n"); - swap_for_hibernation = true; - for (i = 0; i < MAX_SWAPFILES; i++) - hibernation_offset[i] = 1; - spin_unlock(&swap_lock); -} - -void hibernation_thaw_swap(void) -{ - spin_lock(&swap_lock); - if (swap_for_hibernation) { - printk(KERN_INFO "PM: Thaw Swap\n"); - swap_for_hibernation = false; - } - spin_unlock(&swap_lock); -} - -/* - * Because updateing swap_map[] can make not-saved-status-change, - * we use our own easy allocator. - * Please see kernel/power/swap.c, Used swaps are recorded into - * RB-tree. - */ -swp_entry_t get_swap_for_hibernation(int type) -{ - pgoff_t off; - swp_entry_t val = {0}; - struct swap_info_struct *si; - - spin_lock(&swap_lock); - - si = swap_info[type]; - if (!si || !(si->flags & SWP_WRITEOK)) - goto done; - - for (off = hibernation_offset[type]; off < si->max; ++off) { - if (!si->swap_map[off]) - break; - } - if (off < si->max) { - val = swp_entry(type, off); - hibernation_offset[type] = off + 1; - } -done: - spin_unlock(&swap_lock); - return val; -} - -void swap_free_for_hibernation(swp_entry_t ent) -{ - /* Nothing to do */ -} - /* * Find the swap type that corresponds to given device (if any). * @@ -2084,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags |= SWP_SOLIDSTATE; p->cluster_next = 1 + (random32() % p->highest_bit); } - if (discard_swap(p) == 0) + if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD)) p->flags |= SWP_DISCARDABLE; } diff --git a/mm/vmstat.c b/mm/vmstat.c index f389168..355a9e6 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void) int threshold; for_each_populated_zone(zone) { + unsigned long max_drift, tolerate_drift; + threshold = calculate_threshold(zone); for_each_online_cpu(cpu) per_cpu_ptr(zone->pageset, cpu)->stat_threshold = threshold; + + /* + * Only set percpu_drift_mark if there is a danger that + * NR_FREE_PAGES reports the low watermark is ok when in fact + * the min watermark could be breached by an allocation + */ + tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); + max_drift = num_online_cpus() * threshold; + if (max_drift > tolerate_drift) + zone->percpu_drift_mark = high_wmark_pages(zone) + + max_drift; } } @@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n scanned %lu" "\n spanned %lu" "\n present %lu", - zone_page_state(zone, NR_FREE_PAGES), + zone_nr_free_pages(zone), min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), @@ -998,6 +1011,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: + refresh_zone_stat_thresholds(); start_cpu_timer(cpu); node_set_state(cpu_to_node(cpu), N_CPU); break; diff --git a/net/9p/client.c b/net/9p/client.c index dc6f2f2..9eb7250 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -331,8 +331,10 @@ static void p9_tag_cleanup(struct p9_client *c) } } - if (c->tagpool) + if (c->tagpool) { + p9_idpool_put(0, c->tagpool); /* free reserved tag 0 */ p9_idpool_destroy(c->tagpool); + } /* free requests associated with tags */ for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { @@ -944,6 +946,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int16_t nwqids, count; err = 0; + wqids = NULL; clnt = oldfid->clnt; if (clone) { fid = p9_fid_create(clnt); @@ -994,9 +997,11 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, else fid->qid = oldfid->qid; + kfree(wqids); return fid; clunk_fid: + kfree(wqids); p9_client_clunk(fid); fid = NULL; diff --git a/net/Kconfig b/net/Kconfig index e330594..e926884 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,7 +217,7 @@ source "net/dns_resolver/Kconfig" config RPS boolean - depends on SMP && SYSFS + depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS default y menu "Network testing" diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 5ed00bd..137f232 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -761,9 +761,11 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && - !skb_is_gso(skb)) + !skb_is_gso(skb)) { + /* BUG: Should really parse the IP options here. */ + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); return ip_fragment(skb, br_dev_queue_push_xmit); - else + } else return br_dev_queue_push_xmit(skb); } #else diff --git a/net/core/dev.c b/net/core/dev.c index 3721fbb..660dd41 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2058,16 +2058,16 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { int queue_index; - struct sock *sk = skb->sk; + const struct net_device_ops *ops = dev->netdev_ops; - queue_index = sk_tx_queue_get(sk); - if (queue_index < 0) { - const struct net_device_ops *ops = dev->netdev_ops; + if (ops->ndo_select_queue) { + queue_index = ops->ndo_select_queue(dev, skb); + queue_index = dev_cap_txqueue(dev, queue_index); + } else { + struct sock *sk = skb->sk; + queue_index = sk_tx_queue_get(sk); + if (queue_index < 0) { - if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb); - queue_index = dev_cap_txqueue(dev, queue_index); - } else { queue_index = 0; if (dev->real_num_tx_queues > 1) queue_index = skb_tx_hash(dev, skb); @@ -4845,7 +4845,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - synchronize_net(); + rcu_barrier(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 9fbe7f7..6743146 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -232,7 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, est->last_packets = bstats->packets; est->avpps = rate_est->pps<<10; - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); if (!elist[idx].timer.function) { INIT_LIST_HEAD(&elist[idx].list); setup_timer(&elist[idx].timer, est_timer, idx); @@ -243,7 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, list_add_rcu(&est->list, &elist[idx].list); gen_add_node(est); - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); return 0; } @@ -270,7 +270,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, { struct gen_estimator *e; - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); while ((e = gen_find_node(bstats, rate_est))) { rb_erase(&e->node, &est_root); @@ -281,7 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, list_del_rcu(&e->list); call_rcu(&e->e_rcu, __gen_kill_estimator); } - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); } EXPORT_SYMBOL(gen_kill_estimator); @@ -320,9 +320,9 @@ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, ASSERT_RTNL(); - spin_lock(&est_tree_lock); + spin_lock_bh(&est_tree_lock); res = gen_find_node(bstats, rate_est) != NULL; - spin_unlock(&est_tree_lock); + spin_unlock_bh(&est_tree_lock); return res; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f..c83b421 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2573,6 +2573,10 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) __copy_skb_header(nskb, skb); nskb->mac_len = skb->mac_len; + /* nskb and skb might have different headroom */ + if (nskb->ip_summed == CHECKSUM_PARTIAL) + nskb->csum_start += skb_headroom(nskb) - headroom; + skb_reset_mac_header(nskb); skb_set_network_header(nskb, skb->mac_len); nskb->transport_header = (nskb->network_header + @@ -2703,7 +2707,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) return -E2BIG; headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); + nskb = alloc_skb(headroom + skb_gro_offset(p), GFP_ATOMIC); if (unlikely(!nskb)) return -ENOMEM; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7c3a7d1..571f895 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -46,7 +46,7 @@ config IP_ADVANCED_ROUTER rp_filter on use: echo 1 > /proc/sys/net/ipv4/conf//rp_filter - and + or echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter Note that some distributions enable it in startup scripts. diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f055094..721a8a3 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -62,8 +62,11 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } if (!inet->inet_saddr) inet->inet_saddr = rt->rt_src; /* Update source address */ - if (!inet->inet_rcv_saddr) + if (!inet->inet_rcv_saddr) { inet->inet_rcv_saddr = rt->rt_src; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } inet->inet_daddr = rt->rt_dst; inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a439689..7d02a9f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -246,6 +246,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct fib_result res; int no_addr, rpf, accept_local; + bool dev_match; int ret; struct net *net; @@ -273,12 +274,22 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); + dev_match = false; + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } #else if (FIB_RES_DEV(res) == dev) + dev_match = true; #endif - { + if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; fib_res_put(&res); return ret; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 79d057a..4a8e370 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -186,7 +186,9 @@ static inline struct tnode *node_parent_rcu(struct node *node) { struct tnode *ret = node_parent(node); - return rcu_dereference(ret); + return rcu_dereference_check(ret, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); } /* Same as rcu_assign_pointer @@ -1753,7 +1755,9 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c) static struct leaf *trie_firstleaf(struct trie *t) { - struct tnode *n = (struct tnode *) rcu_dereference(t->trie); + struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, + rcu_read_lock_held() || + lockdep_rtnl_is_held()); if (!n) return NULL; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a1ad0e7..1fdcacd 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -834,7 +834,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, int mark = 0; - if (len == 8) { + if (len == 8 || IGMP_V2_SEEN(in_dev)) { if (ih->code == 0) { /* Alas, old v1 router presents here. */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 6c40a8c..64b70ad 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1129,6 +1129,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_HDRINCL: val = inet->hdrincl; break; + case IP_NODEFRAG: + val = inet->nodefrag; + break; case IP_MTU_DISCOVER: val = inet->pmtudisc; break; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3f56b6e..6298f75 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2738,6 +2738,11 @@ slow_output: } EXPORT_SYMBOL_GPL(__ip_route_output_key); +static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} + static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) { } @@ -2746,7 +2751,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, - .check = ipv4_dst_check, + .check = ipv4_blackhole_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef..fb23c2e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1260,6 +1260,49 @@ void udp_lib_unhash(struct sock *sk) } EXPORT_SYMBOL(udp_lib_unhash); +/* + * inet_rcv_saddr was changed, we must rehash secondary hash + */ +void udp_lib_rehash(struct sock *sk, u16 newhash) +{ + if (sk_hashed(sk)) { + struct udp_table *udptable = sk->sk_prot->h.udp_table; + struct udp_hslot *hslot, *hslot2, *nhslot2; + + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + nhslot2 = udp_hashslot2(udptable, newhash); + udp_sk(sk)->udp_portaddr_hash = newhash; + if (hslot2 != nhslot2) { + hslot = udp_hashslot(udptable, sock_net(sk), + udp_sk(sk)->udp_port_hash); + /* we must lock primary chain too */ + spin_lock_bh(&hslot->lock); + + spin_lock(&hslot2->lock); + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); + hslot2->count--; + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, + &nhslot2->head); + nhslot2->count++; + spin_unlock(&nhslot2->lock); + + spin_unlock_bh(&hslot->lock); + } + } +} +EXPORT_SYMBOL(udp_lib_rehash); + +static void udp_v4_rehash(struct sock *sk) +{ + u16 new_hash = udp4_portaddr_hash(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, + inet_sk(sk)->inet_num); + udp_lib_rehash(sk, new_hash); +} + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int rc; @@ -1843,6 +1886,7 @@ struct proto udp_prot = { .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 7d929a2..ef371aa 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -105,9 +105,12 @@ ipv4_connected: if (ipv6_addr_any(&np->saddr)) ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - if (ipv6_addr_any(&np->rcv_saddr)) + if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, &np->rcv_saddr); + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); + } goto out; } @@ -181,6 +184,8 @@ ipv4_connected: if (ipv6_addr_any(&np->rcv_saddr)) { ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); inet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (sk->sk_prot->rehash) + sk->sk_prot->rehash(sk); } ip6_dst_store(sk, dst, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 13ef5bc..578f3c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -113,14 +113,6 @@ static void nf_skb_free(struct sk_buff *skb) kfree_skb(NFCT_FRAG6_CB(skb)->orig); } -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf_init_frags.mem); - nf_skb_free(skb); - kfree_skb(skb); -} - /* Destruction primitives. */ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) @@ -282,66 +274,22 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if (prev) { - int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; - - if (i > 0) { - offset += i; - if (end <= offset) { - pr_debug("overlap\n"); - goto err; - } - if (!pskb_pull(skb, i)) { - pr_debug("Can't pull\n"); - goto err; - } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - while (next && NFCT_FRAG6_CB(next)->offset < end) { - /* overlap is 'i' bytes */ - int i = end - NFCT_FRAG6_CB(next)->offset; - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - pr_debug("Eat head of the overlapped parts.: %d", i); - if (!pskb_pull(next, i)) - goto err; - /* next fragment */ - NFCT_FRAG6_CB(next)->offset += i; - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragmnet is completely overridden with - * new one drop it. - */ - next = next->next; + /* Check for overlap with preceding fragment. */ + if (prev && + (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && NFCT_FRAG6_CB(next)->offset < end) + goto discard_fq; NFCT_FRAG6_CB(skb)->offset = offset; @@ -371,6 +319,8 @@ found: write_unlock(&nf_frags.lock); return 0; +discard_fq: + fq_kill(fq); err: return -1; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 545c414..64cfef1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -149,13 +149,6 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_match); -/* Memory Tracking Functions. */ -static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb) -{ - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - void ip6_frag_init(struct inet_frag_queue *q, void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); @@ -346,58 +339,22 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } found: - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. + /* RFC5722, Section 4: + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments, including those not yet received) MUST be silently + * discarded. */ - if (prev) { - int i = (FRAG6_CB(prev)->offset + prev->len) - offset; - if (i > 0) { - offset += i; - if (end <= offset) - goto err; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } + /* Check for overlap with preceding fragment. */ + if (prev && + (FRAG6_CB(prev)->offset + prev->len) - offset > 0) + goto discard_fq; - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. - */ - while (next && FRAG6_CB(next)->offset < end) { - int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG6_CB(next)->offset += i; /* next fragment */ - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; - - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it); - } - } + /* Look for overlap with succeeding segment. */ + if (next && FRAG6_CB(next)->offset < end) + goto discard_fq; FRAG6_CB(skb)->offset = offset; @@ -436,6 +393,8 @@ found: write_unlock(&ip6_frags.lock); return -1; +discard_fq: + fq_kill(fq); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1dd1aff..5acb356 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,6 +111,15 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); } +static void udp_v6_rehash(struct sock *sk) +{ + u16 new_hash = udp6_portaddr_hash(sock_net(sk), + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->inet_num); + + udp_lib_rehash(sk, new_hash); +} + static inline int compute_score(struct sock *sk, struct net *net, unsigned short hnum, struct in6_addr *saddr, __be16 sport, @@ -1447,6 +1456,7 @@ struct proto udpv6_prot = { .backlog_rcv = udpv6_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 79986a6..fd55b51 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -824,8 +824,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); if (err < 0) { - kfree(self->ias_obj->name); - kfree(self->ias_obj); + irias_delete_object(self->ias_obj); + self->ias_obj = NULL; goto out; } diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index a788f9e..6130f9d 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -1102,7 +1102,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len) memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */ le16_to_cpus(&val_len); n+=2; - if (val_len > 1016) { + if (val_len >= 1016) { IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ ); return -RSP_INVALID_COMMAND_FORMAT; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 023ba82..5826129 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1024,7 +1024,8 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); - int rc = -EINVAL, opt; + unsigned int opt; + int rc = -EINVAL; lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index e4dae02..cf4aea3 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -689,7 +689,7 @@ static void llc_station_rcv(struct sk_buff *skb) int __init llc_station_init(void) { - u16 rc = -ENOBUFS; + int rc = -ENOBUFS; struct sk_buff *skb; struct llc_station_state_ev *ev; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 798a91b..ded5c38 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -732,6 +732,12 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); + /* + * Now all work items will be gone, but the + * timer might still be armed, so delete it + */ + del_timer_sync(&local->work_timer); + cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 4f8ddba..4c2f89d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -924,6 +924,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); skb->ipvs_property = 1; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index f228a17..7e9af5b 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -359,7 +360,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); - if (ct && !nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) { /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, @@ -409,7 +410,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; - struct nf_conn *ct; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -496,11 +496,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_control_add(n_cp, cp); } - ct = (struct nf_conn *)skb->nfct; - if (ct && ct != &nf_conntrack_untracked) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, &n_cp->dport, 1); - /* * Move tunnel to listen state */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 21e1a5e..49df6be 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -349,8 +349,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -static void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) { struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conntrack_tuple new_tuple; @@ -365,11 +365,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) * real-server we will see RIP->DIP. */ new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - new_tuple.src.u3 = cp->daddr; + if (outin) + new_tuple.src.u3 = cp->daddr; + else + new_tuple.dst.u3 = cp->vaddr; /* * This will also take care of UDP and other protocols. */ - new_tuple.src.u.tcp.port = cp->dport; + if (outin) + new_tuple.src.u.tcp.port = cp->dport; + else + new_tuple.dst.u.tcp.port = cp->vport; nf_conntrack_alter_reply(ct, &new_tuple); } @@ -428,7 +434,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -506,7 +512,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp); + ip_vs_update_conntrack(skb, cp, 1); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 980fe4a..cd96ed3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2102,6 +2102,26 @@ static void __net_exit netlink_net_exit(struct net *net) #endif } +static void __init netlink_add_usersock_entry(void) +{ + unsigned long *listeners; + int groups = 32; + + listeners = kzalloc(NLGRPSZ(groups) + sizeof(struct listeners_rcu_head), + GFP_KERNEL); + if (!listeners) + panic("netlink_add_usersock_entry: Cannot allocate listneres\n"); + + netlink_table_grab(); + + nl_table[NETLINK_USERSOCK].groups = groups; + nl_table[NETLINK_USERSOCK].listeners = listeners; + nl_table[NETLINK_USERSOCK].module = THIS_MODULE; + nl_table[NETLINK_USERSOCK].registered = 1; + + netlink_table_ungrab(); +} + static struct pernet_operations __net_initdata netlink_net_ops = { .init = netlink_net_init, .exit = netlink_net_exit, @@ -2150,6 +2170,8 @@ static int __init netlink_proto_init(void) hash->rehash_time = jiffies; } + netlink_add_usersock_entry(); + sock_register(&netlink_family_ops); register_pernet_subsys(&netlink_net_ops); /* The netlink device handler may be needed early. */ diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 537a487..7ebf743 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -350,22 +350,19 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = a->priv; - struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - opt.refcnt = police->tcf_refcnt - ref; - opt.bindcnt = police->tcf_bindcnt - bind; + struct tc_police opt = { + .index = police->tcf_index, + .action = police->tcf_action, + .mtu = police->tcfp_mtu, + .burst = police->tcfp_burst, + .refcnt = police->tcf_refcnt - ref, + .bindcnt = police->tcf_bindcnt - bind, + }; + if (police->tcfp_R_tab) opt.rate = police->tcfp_R_tab->rate; - else - memset(&opt.rate, 0, sizeof(opt.rate)); if (police->tcfp_P_tab) opt.peakrate = police->tcfp_P_tab->rate; - else - memset(&opt.peakrate, 0, sizeof(opt.peakrate)); NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); if (police->tcfp_result) NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 3406627..6318e11 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -255,10 +255,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -EINVAL; goto err_out; } - if (!list_empty(&flow->list)) { - error = -EEXIST; - goto err_out; - } } else { int i; unsigned long cl; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index abd904b..4749609 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -761,8 +761,8 @@ init_vf(struct hfsc_class *cl, unsigned int len) if (f != cl->cl_f) { cl->cl_f = f; cftree_update(cl); - update_cfmin(cl->cl_parent); } + update_cfmin(cl->cl_parent); } } diff --git a/net/sctp/output.c b/net/sctp/output.c index a646681..bcc4590 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -92,7 +92,6 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); - sctp_packet_reset(packet); packet->vtag = vtag; if (ecn_capable && sctp_packet_empty(packet)) { diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd5..d344dc4 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1232,6 +1232,18 @@ out: return 0; } +static bool list_has_sctp_addr(const struct list_head *list, + union sctp_addr *ipaddr) +{ + struct sctp_transport *addr; + + list_for_each_entry(addr, list, transports) { + if (sctp_cmp_addr_exact(ipaddr, &addr->ipaddr)) + return true; + } + + return false; +} /* A restart is occurring, check to make sure no new addresses * are being added as we may be under a takeover attack. */ @@ -1240,10 +1252,10 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, struct sctp_chunk *init, sctp_cmd_seq_t *commands) { - struct sctp_transport *new_addr, *addr; - int found; + struct sctp_transport *new_addr; + int ret = 1; - /* Implementor's Guide - Sectin 5.2.2 + /* Implementor's Guide - Section 5.2.2 * ... * Before responding the endpoint MUST check to see if the * unexpected INIT adds new addresses to the association. If new @@ -1254,31 +1266,19 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, /* Search through all current addresses and make sure * we aren't adding any new ones. */ - new_addr = NULL; - found = 0; - list_for_each_entry(new_addr, &new_asoc->peer.transport_addr_list, - transports) { - found = 0; - list_for_each_entry(addr, &asoc->peer.transport_addr_list, - transports) { - if (sctp_cmp_addr_exact(&new_addr->ipaddr, - &addr->ipaddr)) { - found = 1; - break; - } - } - if (!found) + transports) { + if (!list_has_sctp_addr(&asoc->peer.transport_addr_list, + &new_addr->ipaddr)) { + sctp_sf_send_restart_abort(&new_addr->ipaddr, init, + commands); + ret = 0; break; - } - - /* If a new address was added, ABORT the sender. */ - if (!found && new_addr) { - sctp_sf_send_restart_abort(&new_addr->ipaddr, init, commands); + } } /* Return success if all addresses were found. */ - return found; + return ret; } /* Populate the verification/tie tags based on overlapping INIT diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 36cb660..e9eaaf7 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -38,7 +38,7 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; -#define MAX_HASHTABLE_BITS (10) +#define MAX_HASHTABLE_BITS (14) static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp) { unsigned long num; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dcfc66b..12c4859 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -745,17 +745,18 @@ gss_pipe_release(struct inode *inode) struct rpc_inode *rpci = RPC_I(inode); struct gss_upcall_msg *gss_msg; +restart: spin_lock(&inode->i_lock); - while (!list_empty(&rpci->in_downcall)) { + list_for_each_entry(gss_msg, &rpci->in_downcall, list) { - gss_msg = list_entry(rpci->in_downcall.next, - struct gss_upcall_msg, list); + if (!list_empty(&gss_msg->msg.list)) + continue; gss_msg->msg.errno = -EPIPE; atomic_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); - spin_lock(&inode->i_lock); + goto restart; } spin_unlock(&inode->i_lock); diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 0326446..778e5df 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -237,6 +237,7 @@ get_key(const void *p, const void *end, if (!supported_gss_krb5_enctype(alg)) { printk(KERN_WARNING "gss_kerberos_mech: unsupported " "encryption key algorithm %d\n", alg); + p = ERR_PTR(-EINVAL); goto out_err; } p = simple_get_netobj(p, end, &key); @@ -282,15 +283,19 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx) ctx->enctype = ENCTYPE_DES_CBC_RAW; ctx->gk5e = get_gss_krb5_enctype(ctx->enctype); - if (ctx->gk5e == NULL) + if (ctx->gk5e == NULL) { + p = ERR_PTR(-EINVAL); goto out_err; + } /* The downcall format was designed before we completely understood * the uses of the context fields; so it includes some stuff we * just give some minimal sanity-checking, and some we ignore * completely (like the next twenty bytes): */ - if (unlikely(p + 20 > end || p + 20 < p)) + if (unlikely(p + 20 > end || p + 20 < p)) { + p = ERR_PTR(-EFAULT); goto out_err; + } p += 20; p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); if (IS_ERR(p)) @@ -619,6 +624,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, if (ctx->seq_send64 != ctx->seq_send) { dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__, (long unsigned)ctx->seq_send64, ctx->seq_send); + p = ERR_PTR(-EINVAL); goto out_err; } p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype)); diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index dc3f1f5..adade3d 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -100,6 +100,7 @@ gss_import_sec_context_spkm3(const void *p, size_t len, if (version != 1) { dprintk("RPC: unknown spkm3 token format: " "obsolete nfs-utils?\n"); + p = ERR_PTR(-EINVAL); goto out_err_free_ctx; } @@ -135,8 +136,10 @@ gss_import_sec_context_spkm3(const void *p, size_t len, if (IS_ERR(p)) goto out_err_free_intg_alg; - if (p != end) + if (p != end) { + p = ERR_PTR(-EFAULT); goto out_err_free_intg_key; + } ctx_id->internal_ctx_id = ctx; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2388d83..fa55490 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_no_principal; } - kref_init(&clnt->cl_kref); + atomic_set(&clnt->cl_count, 1); err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) @@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } - kref_init(&new->cl_kref); + atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); xprt_get(clnt->cl_xprt); - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; @@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); * Free an RPC client */ static void -rpc_free_client(struct kref *kref) +rpc_free_client(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (!IS_ERR(clnt->cl_path.dentry)) { @@ -495,12 +493,10 @@ out_free: * Free an RPC client */ static void -rpc_free_auth(struct kref *kref) +rpc_free_auth(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - if (clnt->cl_auth == NULL) { - rpc_free_client(kref); + rpc_free_client(clnt); return; } @@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref) * release remaining GSS contexts. This mechanism ensures * that it can do so safely. */ - kref_init(kref); + atomic_inc(&clnt->cl_count); rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; - kref_put(kref, rpc_free_client); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); } /* @@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - kref_put(&clnt->cl_kref, rpc_free_auth); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_auth(clnt); } /** @@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) if (clnt != NULL) { rpc_task_release_client(task); task->tk_client = clnt; - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; /* Add to the client's list of all tasks */ @@ -931,7 +929,7 @@ call_reserveresult(struct rpc_task *task) task->tk_status = 0; if (status >= 0) { if (task->tk_rqstp) { - task->tk_action = call_allocate; + task->tk_action = call_refresh; return; } @@ -966,13 +964,54 @@ call_reserveresult(struct rpc_task *task) } /* - * 2. Allocate the buffer. For details, see sched.c:rpc_malloc. + * 2. Bind and/or refresh the credentials + */ +static void +call_refresh(struct rpc_task *task) +{ + dprint_status(task); + + task->tk_action = call_refreshresult; + task->tk_status = 0; + task->tk_client->cl_stats->rpcauthrefresh++; + rpcauth_refreshcred(task); +} + +/* + * 2a. Process the results of a credential refresh + */ +static void +call_refreshresult(struct rpc_task *task) +{ + int status = task->tk_status; + + dprint_status(task); + + task->tk_status = 0; + task->tk_action = call_allocate; + if (status >= 0 && rpcauth_uptodatecred(task)) + return; + switch (status) { + case -EACCES: + rpc_exit(task, -EACCES); + return; + case -ENOMEM: + rpc_exit(task, -ENOMEM); + return; + case -ETIMEDOUT: + rpc_delay(task, 3*HZ); + } + task->tk_action = call_refresh; +} + +/* + * 2b. Allocate the buffer. For details, see sched.c:rpc_malloc. * (Note: buffer memory is freed in xprt_release). */ static void call_allocate(struct rpc_task *task) { - unsigned int slack = task->tk_client->cl_auth->au_cslack; + unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack; struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_procinfo *proc = task->tk_msg.rpc_proc; @@ -980,7 +1019,7 @@ call_allocate(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - task->tk_action = call_refresh; + task->tk_action = call_bind; if (req->rq_buffer) return; @@ -1017,47 +1056,6 @@ call_allocate(struct rpc_task *task) rpc_exit(task, -ERESTARTSYS); } -/* - * 2a. Bind and/or refresh the credentials - */ -static void -call_refresh(struct rpc_task *task) -{ - dprint_status(task); - - task->tk_action = call_refreshresult; - task->tk_status = 0; - task->tk_client->cl_stats->rpcauthrefresh++; - rpcauth_refreshcred(task); -} - -/* - * 2b. Process the results of a credential refresh - */ -static void -call_refreshresult(struct rpc_task *task) -{ - int status = task->tk_status; - - dprint_status(task); - - task->tk_status = 0; - task->tk_action = call_bind; - if (status >= 0 && rpcauth_uptodatecred(task)) - return; - switch (status) { - case -EACCES: - rpc_exit(task, -EACCES); - return; - case -ENOMEM: - rpc_exit(task, -ENOMEM); - return; - case -ETIMEDOUT: - rpc_delay(task, 3*HZ); - } - task->tk_action = call_refresh; -} - static inline int rpc_task_need_encode(struct rpc_task *task) { diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 95ccbcf..8c8eef2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -48,7 +48,7 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head, return; do { msg = list_entry(head->next, struct rpc_pipe_msg, list); - list_del(&msg->list); + list_del_init(&msg->list); msg->errno = err; destroy_msg(msg); } while (!list_empty(head)); @@ -208,7 +208,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) if (msg != NULL) { spin_lock(&inode->i_lock); msg->errno = -EAGAIN; - list_del(&msg->list); + list_del_init(&msg->list); spin_unlock(&inode->i_lock); rpci->ops->destroy_msg(msg); } @@ -268,7 +268,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; spin_lock(&inode->i_lock); - list_del(&msg->list); + list_del_init(&msg->list); spin_unlock(&inode->i_lock); rpci->ops->destroy_msg(msg); } @@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v) static int rpc_info_open(struct inode *inode, struct file *file) { - struct rpc_clnt *clnt; + struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; - mutex_lock(&inode->i_mutex); - clnt = RPC_I(inode)->private; - if (clnt) { - kref_get(&clnt->cl_kref); + + spin_lock(&file->f_path.dentry->d_lock); + if (!d_unhashed(file->f_path.dentry)) + clnt = RPC_I(inode)->private; + if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) { + spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { + spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } - mutex_unlock(&inode->i_mutex); } return ret; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 4414a18..0b39b24 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -692,6 +692,7 @@ static int unix_autobind(struct socket *sock) static u32 ordernum = 1; struct unix_address *addr; int err; + unsigned int retries = 0; mutex_lock(&u->readlock); @@ -717,9 +718,17 @@ retry: if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, addr->hash)) { spin_unlock(&unix_table_lock); - /* Sanity yield. It is unusual case, but yet... */ - if (!(ordernum&0xFF)) - yield(); + /* + * __unix_find_socket_byname() may take long time if many names + * are already in use. + */ + cond_resched(); + /* Give up if all names seems to be in use. */ + if (retries++ == 0xFFFFF) { + err = -ENOSPC; + kfree(addr); + goto out; + } goto retry; } addr->hash ^= sk->sk_type; diff --git a/net/wireless/core.c b/net/wireless/core.c index 541e2ff..d6d046b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -475,12 +475,10 @@ int wiphy_register(struct wiphy *wiphy) mutex_lock(&cfg80211_mutex); res = device_add(&rdev->wiphy.dev); - if (res) - goto out_unlock; - - res = rfkill_register(rdev->rfkill); - if (res) - goto out_rm_dev; + if (res) { + mutex_unlock(&cfg80211_mutex); + return res; + } /* set up regulatory info */ wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); @@ -509,13 +507,18 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_debugfs_rdev_add(rdev); mutex_unlock(&cfg80211_mutex); + /* + * due to a locking dependency this has to be outside of the + * cfg80211_mutex lock + */ + res = rfkill_register(rdev->rfkill); + if (res) + goto out_rm_dev; + return 0; out_rm_dev: device_del(&rdev->wiphy.dev); - -out_unlock: - mutex_unlock(&cfg80211_mutex); return res; } EXPORT_SYMBOL(wiphy_register); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index bb5e0a5..7e5c3a4 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1420,6 +1420,9 @@ int cfg80211_wext_giwessid(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; + data->flags = 0; + data->length = 0; + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: return cfg80211_ibss_wext_giwessid(dev, info, data, ssid); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 0ef17bc..8f5116f 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -782,6 +782,22 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd, } } + if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) { + /* + * If this is a GET, but not NOMAX, it means that the extra + * data is not bounded by userspace, but by max_tokens. Thus + * set the length to max_tokens. This matches the extra data + * allocation. + * The driver should fill it with the number of tokens it + * provided, and it may check iwp->length rather than having + * knowledge of max_tokens. If the driver doesn't change the + * iwp->length, this ioctl just copies back max_token tokens + * filled with zeroes. Hopefully the driver isn't claiming + * them to be valid data. + */ + iwp->length = descr->max_tokens; + } + err = handler(dev, info, (union iwreq_data *) iwp, extra); iwp->length += essid_compat; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a3cca0a..64f2ae1 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -101,7 +101,7 @@ resume: err = -EHOSTUNREACH; goto error_nolock; } - skb_dst_set_noref(skb, dst); + skb_dst_set(skb, dst_clone(dst)); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b14ed4b..8bae6b2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1801,7 +1801,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m);; + u32 mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c index 79ab973..fc3b18d 100644 --- a/scripts/basic/docproc.c +++ b/scripts/basic/docproc.c @@ -34,12 +34,14 @@ * */ +#define _GNU_SOURCE #include #include #include #include #include #include +#include #include #include @@ -54,6 +56,7 @@ typedef void FILEONLY(char * file); FILEONLY *internalfunctions; FILEONLY *externalfunctions; FILEONLY *symbolsonly; +FILEONLY *findall; typedef void FILELINE(char * file, char * line); FILELINE * singlefunctions; @@ -65,12 +68,30 @@ FILELINE * docsection; #define KERNELDOCPATH "scripts/" #define KERNELDOC "kernel-doc" #define DOCBOOK "-docbook" +#define LIST "-list" #define FUNCTION "-function" #define NOFUNCTION "-nofunction" #define NODOCSECTIONS "-no-doc-sections" static char *srctree, *kernsrctree; +static char **all_list = NULL; +static int all_list_len = 0; + +static void consume_symbol(const char *sym) +{ + int i; + + for (i = 0; i < all_list_len; i++) { + if (!all_list[i]) + continue; + if (strcmp(sym, all_list[i])) + continue; + all_list[i] = NULL; + break; + } +} + static void usage (void) { fprintf(stderr, "Usage: docproc {doc|depend} file\n"); @@ -248,6 +269,7 @@ static void docfunctions(char * filename, char * type) struct symfile * sym = &symfilelist[i]; for (j=0; j < sym->symbolcnt; j++) { vec[idx++] = type; + consume_symbol(sym->symbollist[j].name); vec[idx++] = sym->symbollist[j].name; } } @@ -287,6 +309,11 @@ static void singfunc(char * filename, char * line) vec[idx++] = &line[i]; } } + for (i = 0; i < idx; i++) { + if (strcmp(vec[i], FUNCTION)) + continue; + consume_symbol(vec[i + 1]); + } vec[idx++] = filename; vec[idx] = NULL; exec_kernel_doc(vec); @@ -306,6 +333,10 @@ static void docsect(char *filename, char *line) if (*s == '\n') *s = '\0'; + asprintf(&s, "DOC: %s", line); + consume_symbol(s); + free(s); + vec[0] = KERNELDOC; vec[1] = DOCBOOK; vec[2] = FUNCTION; @@ -315,6 +346,84 @@ static void docsect(char *filename, char *line) exec_kernel_doc(vec); } +static void find_all_symbols(char *filename) +{ + char *vec[4]; /* kerneldoc -list file NULL */ + pid_t pid; + int ret, i, count, start; + char real_filename[PATH_MAX + 1]; + int pipefd[2]; + char *data, *str; + size_t data_len = 0; + + vec[0] = KERNELDOC; + vec[1] = LIST; + vec[2] = filename; + vec[3] = NULL; + + if (pipe(pipefd)) { + perror("pipe"); + exit(1); + } + + switch (pid=fork()) { + case -1: + perror("fork"); + exit(1); + case 0: + close(pipefd[0]); + dup2(pipefd[1], 1); + memset(real_filename, 0, sizeof(real_filename)); + strncat(real_filename, kernsrctree, PATH_MAX); + strncat(real_filename, "/" KERNELDOCPATH KERNELDOC, + PATH_MAX - strlen(real_filename)); + execvp(real_filename, vec); + fprintf(stderr, "exec "); + perror(real_filename); + exit(1); + default: + close(pipefd[1]); + data = malloc(4096); + do { + while ((ret = read(pipefd[0], + data + data_len, + 4096)) > 0) { + data_len += ret; + data = realloc(data, data_len + 4096); + } + } while (ret == -EAGAIN); + if (ret != 0) { + perror("read"); + exit(1); + } + waitpid(pid, &ret ,0); + } + if (WIFEXITED(ret)) + exitstatus |= WEXITSTATUS(ret); + else + exitstatus = 0xff; + + count = 0; + /* poor man's strtok, but with counting */ + for (i = 0; i < data_len; i++) { + if (data[i] == '\n') { + count++; + data[i] = '\0'; + } + } + start = all_list_len; + all_list_len += count; + all_list = realloc(all_list, sizeof(char *) * all_list_len); + str = data; + for (i = 0; i < data_len && start != all_list_len; i++) { + if (data[i] == '\0') { + all_list[start] = str; + str = data + i + 1; + start++; + } + } +} + /* * Parse file, calling action specific functions for: * 1) Lines containing !E @@ -322,7 +431,8 @@ static void docsect(char *filename, char *line) * 3) Lines containing !D * 4) Lines containing !F * 5) Lines containing !P - * 6) Default lines - lines not matching the above + * 6) Lines containing !C + * 7) Default lines - lines not matching the above */ static void parse_file(FILE *infile) { @@ -365,6 +475,12 @@ static void parse_file(FILE *infile) s++; docsection(line + 2, s); break; + case 'C': + while (*s && !isspace(*s)) s++; + *s = '\0'; + if (findall) + findall(line+2); + break; default: defaultline(line); } @@ -380,6 +496,7 @@ static void parse_file(FILE *infile) int main(int argc, char *argv[]) { FILE * infile; + int i; srctree = getenv("SRCTREE"); if (!srctree) @@ -415,6 +532,7 @@ int main(int argc, char *argv[]) symbolsonly = find_export_symbols; singlefunctions = noaction2; docsection = noaction2; + findall = find_all_symbols; parse_file(infile); /* Rewind to start from beginning of file again */ @@ -425,8 +543,16 @@ int main(int argc, char *argv[]) symbolsonly = printline; singlefunctions = singfunc; docsection = docsect; + findall = NULL; parse_file(infile); + + for (i = 0; i < all_list_len; i++) { + if (!all_list[i]) + continue; + fprintf(stderr, "Warning: didn't use docs for %s\n", + all_list[i]); + } } else if (strcmp("depend", argv[1]) == 0) { @@ -439,6 +565,7 @@ int main(int argc, char *argv[]) symbolsonly = adddep; singlefunctions = adddep2; docsection = adddep2; + findall = adddep; parse_file(infile); printf("\n"); } diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 102e123..cdb6dc1 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -44,12 +44,13 @@ use strict; # Note: This only supports 'c'. # usage: -# kernel-doc [ -docbook | -html | -text | -man ] [ -no-doc-sections ] +# kernel-doc [ -docbook | -html | -text | -man | -list ] [ -no-doc-sections ] # [ -function funcname [ -function funcname ...] ] c file(s)s > outputfile # or # [ -nofunction funcname [ -function funcname ...] ] c file(s)s > outputfile # # Set output format using one of -docbook -html -text or -man. Default is man. +# The -list format is for internal use by docproc. # # -no-doc-sections # Do not output DOC: sections @@ -210,9 +211,16 @@ my %highlights_text = ( $type_constant, "\$1", $type_param, "\$1" ); my $blankline_text = ""; +# list mode +my %highlights_list = ( $type_constant, "\$1", + $type_func, "\$1", + $type_struct, "\$1", + $type_param, "\$1" ); +my $blankline_list = ""; sub usage { - print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man ] [ -no-doc-sections ]\n"; + print "Usage: $0 [ -v ] [ -docbook | -html | -text | -man | -list ]\n"; + print " [ -no-doc-sections ]\n"; print " [ -function funcname [ -function funcname ...] ]\n"; print " [ -nofunction funcname [ -nofunction funcname ...] ]\n"; print " c source file(s) > outputfile\n"; @@ -318,6 +326,10 @@ while ($ARGV[0] =~ m/^-(.*)/) { $output_mode = "xml"; %highlights = %highlights_xml; $blankline = $blankline_xml; + } elsif ($cmd eq "-list") { + $output_mode = "list"; + %highlights = %highlights_list; + $blankline = $blankline_list; } elsif ($cmd eq "-gnome") { $output_mode = "gnome"; %highlights = %highlights_gnome; @@ -1361,6 +1373,42 @@ sub output_blockhead_text(%) { } } +## list mode output functions + +sub output_function_list(%) { + my %args = %{$_[0]}; + + print $args{'function'} . "\n"; +} + +# output enum in list +sub output_enum_list(%) { + my %args = %{$_[0]}; + print $args{'enum'} . "\n"; +} + +# output typedef in list +sub output_typedef_list(%) { + my %args = %{$_[0]}; + print $args{'typedef'} . "\n"; +} + +# output struct as list +sub output_struct_list(%) { + my %args = %{$_[0]}; + + print $args{'struct'} . "\n"; +} + +sub output_blockhead_list(%) { + my %args = %{$_[0]}; + my ($parameter, $section); + + foreach $section (@{$args{'sectionlist'}}) { + print "DOC: $section\n"; + } +} + ## # generic output function for all types (function, struct/union, typedef, enum); # calls the generated, variable output_ function name based on @@ -1679,7 +1727,7 @@ sub check_sections($$$$$$) { foreach $px (0 .. $#prms) { $prm_clean = $prms[$px]; $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//; + $prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i; # ignore array size in a parameter string; # however, the original param string may contain # spaces, e.g.: addr[6 + 2] diff --git a/security/apparmor/include/resource.h b/security/apparmor/include/resource.h index 3c88be9..02baec7 100644 --- a/security/apparmor/include/resource.h +++ b/security/apparmor/include/resource.h @@ -33,8 +33,8 @@ struct aa_rlimit { }; int aa_map_resource(int resource); -int aa_task_setrlimit(struct aa_profile *profile, unsigned int resource, - struct rlimit *new_rlim); +int aa_task_setrlimit(struct aa_profile *profile, struct task_struct *, + unsigned int resource, struct rlimit *new_rlim); void __aa_transition_rlimits(struct aa_profile *old, struct aa_profile *new); diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 6e85cdb..506d2ba 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -40,6 +40,7 @@ char *aa_split_fqname(char *fqname, char **ns_name) *ns_name = NULL; if (name[0] == ':') { char *split = strchr(&name[1], ':'); + *ns_name = skip_spaces(&name[1]); if (split) { /* overwrite ':' with \0 */ *split = 0; @@ -47,7 +48,6 @@ char *aa_split_fqname(char *fqname, char **ns_name) } else /* a ns name without a following profile is allowed */ name = NULL; - *ns_name = &name[1]; } if (name && *name == 0) name = NULL; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index f73e2c2..cf1de44 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -614,7 +614,7 @@ static int apparmor_task_setrlimit(struct task_struct *task, int error = 0; if (!unconfined(profile)) - error = aa_task_setrlimit(profile, resource, new_rlim); + error = aa_task_setrlimit(profile, task, resource, new_rlim); return error; } diff --git a/security/apparmor/path.c b/security/apparmor/path.c index 19358dc..8239605 100644 --- a/security/apparmor/path.c +++ b/security/apparmor/path.c @@ -59,8 +59,7 @@ static int d_namespace_path(struct path *path, char *buf, int buflen, { struct path root, tmp; char *res; - int deleted, connected; - int error = 0; + int connected, error = 0; /* Get the root we want to resolve too, released below */ if (flags & PATH_CHROOT_REL) { @@ -74,19 +73,8 @@ static int d_namespace_path(struct path *path, char *buf, int buflen, } spin_lock(&dcache_lock); - /* There is a race window between path lookup here and the - * need to strip the " (deleted) string that __d_path applies - * Detect the race and relookup the path - * - * The stripping of (deleted) is a hack that could be removed - * with an updated __d_path - */ - do { - tmp = root; - deleted = d_unlinked(path->dentry); - res = __d_path(path, &tmp, buf, buflen); - - } while (deleted != d_unlinked(path->dentry)); + tmp = root; + res = __d_path(path, &tmp, buf, buflen); spin_unlock(&dcache_lock); *name = res; @@ -98,21 +86,17 @@ static int d_namespace_path(struct path *path, char *buf, int buflen, *name = buf; goto out; } - if (deleted) { - /* On some filesystems, newly allocated dentries appear to the - * security_path hooks as a deleted dentry except without an - * inode allocated. - * - * Remove the appended deleted text and return as string for - * normal mediation, or auditing. The (deleted) string is - * guaranteed to be added in this case, so just strip it. - */ - buf[buflen - 11] = 0; /* - (len(" (deleted)") +\0) */ - if (path->dentry->d_inode && !(flags & PATH_MEDIATE_DELETED)) { + /* Handle two cases: + * 1. A deleted dentry && profile is not allowing mediation of deleted + * 2. On some filesystems, newly allocated dentries appear to the + * security_path hooks as a deleted dentry except without an inode + * allocated. + */ + if (d_unlinked(path->dentry) && path->dentry->d_inode && + !(flags & PATH_MEDIATE_DELETED)) { error = -ENOENT; goto out; - } } /* Determine if the path is connected to the expected root */ diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 3cdc1ad..52cc865 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -1151,12 +1151,14 @@ ssize_t aa_remove_profiles(char *fqname, size_t size) /* released below */ ns = aa_get_namespace(root); - write_lock(&ns->lock); if (!name) { /* remove namespace - can only happen if fqname[0] == ':' */ + write_lock(&ns->parent->lock); __remove_namespace(ns); + write_unlock(&ns->parent->lock); } else { /* remove profile */ + write_lock(&ns->lock); profile = aa_get_profile(__lookup_profile(&ns->base, name)); if (!profile) { error = -ENOENT; @@ -1165,8 +1167,8 @@ ssize_t aa_remove_profiles(char *fqname, size_t size) } name = profile->base.hname; __remove_profile(profile); + write_unlock(&ns->lock); } - write_unlock(&ns->lock); /* don't fail removal if audit fails */ (void) audit_policy(OP_PROF_RM, GFP_KERNEL, name, info, error); diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c index 4a368f1..a4136c1 100644 --- a/security/apparmor/resource.c +++ b/security/apparmor/resource.c @@ -72,6 +72,7 @@ int aa_map_resource(int resource) /** * aa_task_setrlimit - test permission to set an rlimit * @profile - profile confining the task (NOT NULL) + * @task - task the resource is being set on * @resource - the resource being set * @new_rlim - the new resource limit (NOT NULL) * @@ -79,18 +80,21 @@ int aa_map_resource(int resource) * * Returns: 0 or error code if setting resource failed */ -int aa_task_setrlimit(struct aa_profile *profile, unsigned int resource, - struct rlimit *new_rlim) +int aa_task_setrlimit(struct aa_profile *profile, struct task_struct *task, + unsigned int resource, struct rlimit *new_rlim) { int error = 0; - if (profile->rlimits.mask & (1 << resource) && - new_rlim->rlim_max > profile->rlimits.limits[resource].rlim_max) - - error = audit_resource(profile, resource, new_rlim->rlim_max, - -EACCES); + /* TODO: extend resource control to handle other (non current) + * processes. AppArmor rules currently have the implicit assumption + * that the task is setting the resource of the current process + */ + if ((task != current->group_leader) || + (profile->rlimits.mask & (1 << resource) && + new_rlim->rlim_max > profile->rlimits.limits[resource].rlim_max)) + error = -EACCES; - return error; + return audit_resource(profile, resource, new_rlim->rlim_max, error); } /** diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 16d100d..3fbcd1d 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -35,6 +35,7 @@ enum tpm_pcrs { TPM_PCR0 = 0, TPM_PCR8 = 8 }; #define IMA_MEASURE_HTABLE_SIZE (1 << IMA_HASH_BITS) /* set during initialization */ +extern int iint_initialized; extern int ima_initialized; extern int ima_used_chip; extern char *ima_hash; diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c index 7625b85..afba4ae 100644 --- a/security/integrity/ima/ima_iint.c +++ b/security/integrity/ima/ima_iint.c @@ -22,9 +22,10 @@ RADIX_TREE(ima_iint_store, GFP_ATOMIC); DEFINE_SPINLOCK(ima_iint_lock); - static struct kmem_cache *iint_cache __read_mostly; +int iint_initialized = 0; + /* ima_iint_find_get - return the iint associated with an inode * * ima_iint_find_get gets a reference to the iint. Caller must @@ -141,6 +142,7 @@ static int __init ima_iintcache_init(void) iint_cache = kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0, SLAB_PANIC, init_once); + iint_initialized = 1; return 0; } security_initcall(ima_iintcache_init); diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f936413..e662b89 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -148,12 +148,14 @@ void ima_counts_get(struct file *file) struct ima_iint_cache *iint; int rc; - if (!ima_initialized || !S_ISREG(inode->i_mode)) + if (!iint_initialized || !S_ISREG(inode->i_mode)) return; iint = ima_iint_find_get(inode); if (!iint) return; mutex_lock(&iint->mutex); + if (!ima_initialized) + goto out; rc = ima_must_measure(iint, inode, MAY_READ, FILE_CHECK); if (rc < 0) goto out; @@ -213,7 +215,7 @@ void ima_file_free(struct file *file) struct inode *inode = file->f_dentry->d_inode; struct ima_iint_cache *iint; - if (!ima_initialized || !S_ISREG(inode->i_mode)) + if (!iint_initialized || !S_ISREG(inode->i_mode)) return; iint = ima_iint_find_get(inode); if (!iint) @@ -230,7 +232,7 @@ static int process_measurement(struct file *file, const unsigned char *filename, { struct inode *inode = file->f_dentry->d_inode; struct ima_iint_cache *iint; - int rc; + int rc = 0; if (!ima_initialized || !S_ISREG(inode->i_mode)) return 0; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index b2b0998..60924f6 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1272,6 +1272,7 @@ long keyctl_session_to_parent(void) keyring_r = NULL; me = current; + rcu_read_lock(); write_lock_irq(&tasklist_lock); parent = me->real_parent; @@ -1304,7 +1305,8 @@ long keyctl_session_to_parent(void) goto not_permitted; /* the keyrings must have the same UID */ - if (pcred->tgcred->session_keyring->uid != mycred->euid || + if ((pcred->tgcred->session_keyring && + pcred->tgcred->session_keyring->uid != mycred->euid) || mycred->tgcred->session_keyring->uid != mycred->euid) goto not_permitted; @@ -1319,6 +1321,7 @@ long keyctl_session_to_parent(void) set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME); write_unlock_irq(&tasklist_lock); + rcu_read_unlock(); if (oldcred) put_cred(oldcred); return 0; @@ -1327,6 +1330,7 @@ already_same: ret = 0; not_permitted: write_unlock_irq(&tasklist_lock); + rcu_read_unlock(); put_cred(cred); return ret; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 204af48..ac242a3 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -372,14 +372,17 @@ static void snd_pcm_substream_proc_hw_params_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; - struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_runtime *runtime; + + mutex_lock(&substream->pcm->open_mutex); + runtime = substream->runtime; if (!runtime) { snd_iprintf(buffer, "closed\n"); - return; + goto unlock; } if (runtime->status->state == SNDRV_PCM_STATE_OPEN) { snd_iprintf(buffer, "no setup\n"); - return; + goto unlock; } snd_iprintf(buffer, "access: %s\n", snd_pcm_access_name(runtime->access)); snd_iprintf(buffer, "format: %s\n", snd_pcm_format_name(runtime->format)); @@ -398,20 +401,25 @@ static void snd_pcm_substream_proc_hw_params_read(struct snd_info_entry *entry, snd_iprintf(buffer, "OSS period frames: %lu\n", (unsigned long)runtime->oss.period_frames); } #endif + unlock: + mutex_unlock(&substream->pcm->open_mutex); } static void snd_pcm_substream_proc_sw_params_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; - struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_runtime *runtime; + + mutex_lock(&substream->pcm->open_mutex); + runtime = substream->runtime; if (!runtime) { snd_iprintf(buffer, "closed\n"); - return; + goto unlock; } if (runtime->status->state == SNDRV_PCM_STATE_OPEN) { snd_iprintf(buffer, "no setup\n"); - return; + goto unlock; } snd_iprintf(buffer, "tstamp_mode: %s\n", snd_pcm_tstamp_mode_name(runtime->tstamp_mode)); snd_iprintf(buffer, "period_step: %u\n", runtime->period_step); @@ -421,24 +429,29 @@ static void snd_pcm_substream_proc_sw_params_read(struct snd_info_entry *entry, snd_iprintf(buffer, "silence_threshold: %lu\n", runtime->silence_threshold); snd_iprintf(buffer, "silence_size: %lu\n", runtime->silence_size); snd_iprintf(buffer, "boundary: %lu\n", runtime->boundary); + unlock: + mutex_unlock(&substream->pcm->open_mutex); } static void snd_pcm_substream_proc_status_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_pcm_substream *substream = entry->private_data; - struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_runtime *runtime; struct snd_pcm_status status; int err; + + mutex_lock(&substream->pcm->open_mutex); + runtime = substream->runtime; if (!runtime) { snd_iprintf(buffer, "closed\n"); - return; + goto unlock; } memset(&status, 0, sizeof(status)); err = snd_pcm_status(substream, &status); if (err < 0) { snd_iprintf(buffer, "error %d\n", err); - return; + goto unlock; } snd_iprintf(buffer, "state: %s\n", snd_pcm_state_name(status.state)); snd_iprintf(buffer, "owner_pid : %d\n", pid_vnr(substream->pid)); @@ -452,6 +465,8 @@ static void snd_pcm_substream_proc_status_read(struct snd_info_entry *entry, snd_iprintf(buffer, "-----\n"); snd_iprintf(buffer, "hw_ptr : %ld\n", runtime->status->hw_ptr); snd_iprintf(buffer, "appl_ptr : %ld\n", runtime->control->appl_ptr); + unlock: + mutex_unlock(&substream->pcm->open_mutex); } #ifdef CONFIG_SND_PCM_XRUN_DEBUG diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 134fc6c..d4eb2ef 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1992,6 +1992,8 @@ void snd_pcm_release_substream(struct snd_pcm_substream *substream) substream->ops->close(substream); substream->hw_opened = 0; } + if (pm_qos_request_active(&substream->latency_pm_qos_req)) + pm_qos_remove_request(&substream->latency_pm_qos_req); if (substream->pcm_release) { substream->pcm_release(substream); substream->pcm_release = NULL; diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index eb68326..a7868ad 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -829,6 +829,8 @@ static int snd_rawmidi_control_ioctl(struct snd_card *card, if (get_user(device, (int __user *)argp)) return -EFAULT; + if (device >= SNDRV_RAWMIDI_DEVICES) /* next device is -1 */ + device = SNDRV_RAWMIDI_DEVICES - 1; mutex_lock(®ister_mutex); device = device < 0 ? 0 : device + 1; while (device < SNDRV_RAWMIDI_DEVICES) { diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index 6857122..69cd7b3 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -281,13 +281,10 @@ snd_seq_oss_open(struct file *file, int level) return 0; _error: - snd_seq_oss_writeq_delete(dp->writeq); - snd_seq_oss_readq_delete(dp->readq); snd_seq_oss_synth_cleanup(dp); snd_seq_oss_midi_cleanup(dp); - delete_port(dp); delete_seq_queue(dp->queue); - kfree(dp); + delete_port(dp); return rc; } @@ -350,8 +347,10 @@ create_port(struct seq_oss_devinfo *dp) static int delete_port(struct seq_oss_devinfo *dp) { - if (dp->port < 0) + if (dp->port < 0) { + kfree(dp); return 0; + } debug_printk(("delete_port %i\n", dp->port)); return snd_seq_event_port_detach(dp->cseq, dp->port); diff --git a/sound/isa/msnd/msnd_pinnacle.c b/sound/isa/msnd/msnd_pinnacle.c index 5f3e684..91d6023 100644 --- a/sound/isa/msnd/msnd_pinnacle.c +++ b/sound/isa/msnd/msnd_pinnacle.c @@ -764,9 +764,9 @@ static long io[SNDRV_CARDS] = SNDRV_DEFAULT_PORT; static int irq[SNDRV_CARDS] = SNDRV_DEFAULT_IRQ; static long mem[SNDRV_CARDS] = SNDRV_DEFAULT_PORT; +#ifndef MSND_CLASSIC static long cfg[SNDRV_CARDS] = SNDRV_DEFAULT_PORT; -#ifndef MSND_CLASSIC /* Extra Peripheral Configuration (Default: Disable) */ static long ide_io0[SNDRV_CARDS] = SNDRV_DEFAULT_PORT; static long ide_io1[SNDRV_CARDS] = SNDRV_DEFAULT_PORT; @@ -894,7 +894,11 @@ static int __devinit snd_msnd_isa_probe(struct device *pdev, unsigned int idx) struct snd_card *card; struct snd_msnd *chip; - if (has_isapnp(idx) || cfg[idx] == SNDRV_AUTO_PORT) { + if (has_isapnp(idx) +#ifndef MSND_CLASSIC + || cfg[idx] == SNDRV_AUTO_PORT +#endif + ) { printk(KERN_INFO LOGNAME ": Assuming PnP mode\n"); return -ENODEV; } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3827092..1482921 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4536,7 +4536,7 @@ int snd_hda_parse_pin_def_config(struct hda_codec *codec, cfg->hp_outs--; memmove(cfg->hp_pins + i, cfg->hp_pins + i + 1, sizeof(cfg->hp_pins[0]) * (cfg->hp_outs - i)); - memmove(sequences_hp + i - 1, sequences_hp + i, + memmove(sequences_hp + i, sequences_hp + i + 1, sizeof(sequences_hp[0]) * (cfg->hp_outs - i)); } } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 1053fff..34940a0 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -126,6 +126,7 @@ MODULE_SUPPORTED_DEVICE("{{Intel, ICH6}," "{Intel, ICH10}," "{Intel, PCH}," "{Intel, CPT}," + "{Intel, PBG}," "{Intel, SCH}," "{ATI, SB450}," "{ATI, SB600}," @@ -2749,6 +2750,8 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { { PCI_DEVICE(0x8086, 0x3b57), .driver_data = AZX_DRIVER_ICH }, /* CPT */ { PCI_DEVICE(0x8086, 0x1c20), .driver_data = AZX_DRIVER_PCH }, + /* PBG */ + { PCI_DEVICE(0x8086, 0x1d20), .driver_data = AZX_DRIVER_PCH }, /* SCH */ { PCI_DEVICE(0x8086, 0x811b), .driver_data = AZX_DRIVER_SCH }, /* ATI SB 450/600 */ diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 4ef5efa..488fd9a 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -972,6 +972,53 @@ static struct hda_verb cs_coef_init_verbs[] = { {} /* terminator */ }; +/* Errata: CS4207 rev C0/C1/C2 Silicon + * + * http://www.cirrus.com/en/pubs/errata/ER880C3.pdf + * + * 6. At high temperature (TA > +85°C), the digital supply current (IVD) + * may be excessive (up to an additional 200 μA), which is most easily + * observed while the part is being held in reset (RESET# active low). + * + * Root Cause: At initial powerup of the device, the logic that drives + * the clock and write enable to the S/PDIF SRC RAMs is not properly + * initialized. + * Certain random patterns will cause a steady leakage current in those + * RAM cells. The issue will resolve once the SRCs are used (turned on). + * + * Workaround: The following verb sequence briefly turns on the S/PDIF SRC + * blocks, which will alleviate the issue. + */ + +static struct hda_verb cs_errata_init_verbs[] = { + {0x01, AC_VERB_SET_POWER_STATE, 0x00}, /* AFG: D0 */ + {0x11, AC_VERB_SET_PROC_STATE, 0x01}, /* VPW: processing on */ + + {0x11, AC_VERB_SET_COEF_INDEX, 0x0008}, + {0x11, AC_VERB_SET_PROC_COEF, 0x9999}, + {0x11, AC_VERB_SET_COEF_INDEX, 0x0017}, + {0x11, AC_VERB_SET_PROC_COEF, 0xa412}, + {0x11, AC_VERB_SET_COEF_INDEX, 0x0001}, + {0x11, AC_VERB_SET_PROC_COEF, 0x0009}, + + {0x07, AC_VERB_SET_POWER_STATE, 0x00}, /* S/PDIF Rx: D0 */ + {0x08, AC_VERB_SET_POWER_STATE, 0x00}, /* S/PDIF Tx: D0 */ + + {0x11, AC_VERB_SET_COEF_INDEX, 0x0017}, + {0x11, AC_VERB_SET_PROC_COEF, 0x2412}, + {0x11, AC_VERB_SET_COEF_INDEX, 0x0008}, + {0x11, AC_VERB_SET_PROC_COEF, 0x0000}, + {0x11, AC_VERB_SET_COEF_INDEX, 0x0001}, + {0x11, AC_VERB_SET_PROC_COEF, 0x0008}, + {0x11, AC_VERB_SET_PROC_STATE, 0x00}, + + {0x07, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Rx: D3 */ + {0x08, AC_VERB_SET_POWER_STATE, 0x03}, /* S/PDIF Tx: D3 */ + /*{0x01, AC_VERB_SET_POWER_STATE, 0x03},*/ /* AFG: D3 This is already handled */ + + {} /* terminator */ +}; + /* SPDIF setup */ static void init_digital(struct hda_codec *codec) { @@ -991,6 +1038,9 @@ static int cs_init(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; + /* init_verb sequence for C0/C1/C2 errata*/ + snd_hda_sequence_write(codec, cs_errata_init_verbs); + snd_hda_sequence_write(codec, cs_coef_init_verbs); if (spec->gpio_mask) { diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 5cdb80e..972e7c4 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -116,6 +116,7 @@ struct conexant_spec { unsigned int dell_vostro:1; unsigned int ideapad:1; unsigned int thinkpad:1; + unsigned int hp_laptop:1; unsigned int ext_mic_present; unsigned int recording; @@ -2299,6 +2300,18 @@ static void cxt5066_ideapad_automic(struct hda_codec *codec) } } +/* toggle input of built-in digital mic and mic jack appropriately */ +static void cxt5066_hp_laptop_automic(struct hda_codec *codec) +{ + unsigned int present; + + present = snd_hda_jack_detect(codec, 0x1b); + snd_printdd("CXT5066: external microphone present=%d\n", present); + snd_hda_codec_write(codec, 0x17, 0, AC_VERB_SET_CONNECT_SEL, + present ? 1 : 3); +} + + /* toggle input of built-in digital mic and mic jack appropriately order is: external mic -> dock mic -> interal mic */ static void cxt5066_thinkpad_automic(struct hda_codec *codec) @@ -2408,6 +2421,20 @@ static void cxt5066_ideapad_event(struct hda_codec *codec, unsigned int res) } /* unsolicited event for jack sensing */ +static void cxt5066_hp_laptop_event(struct hda_codec *codec, unsigned int res) +{ + snd_printdd("CXT5066_hp_laptop: unsol event %x (%x)\n", res, res >> 26); + switch (res >> 26) { + case CONEXANT_HP_EVENT: + cxt5066_hp_automute(codec); + break; + case CONEXANT_MIC_EVENT: + cxt5066_hp_laptop_automic(codec); + break; + } +} + +/* unsolicited event for jack sensing */ static void cxt5066_thinkpad_event(struct hda_codec *codec, unsigned int res) { snd_printdd("CXT5066_thinkpad: unsol event %x (%x)\n", res, res >> 26); @@ -2989,6 +3016,14 @@ static struct hda_verb cxt5066_init_verbs_portd_lo[] = { { } /* end */ }; + +static struct hda_verb cxt5066_init_verbs_hp_laptop[] = { + {0x14, AC_VERB_SET_CONNECT_SEL, 0x0}, + {0x19, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | CONEXANT_HP_EVENT}, + {0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | CONEXANT_MIC_EVENT}, + { } /* end */ +}; + /* initialize jack-sensing, too */ static int cxt5066_init(struct hda_codec *codec) { @@ -3004,6 +3039,8 @@ static int cxt5066_init(struct hda_codec *codec) cxt5066_ideapad_automic(codec); else if (spec->thinkpad) cxt5066_thinkpad_automic(codec); + else if (spec->hp_laptop) + cxt5066_hp_laptop_automic(codec); } cxt5066_set_mic_boost(codec); return 0; @@ -3031,6 +3068,7 @@ enum { CXT5066_DELL_VOSTO, /* Dell Vostro 1015i */ CXT5066_IDEAPAD, /* Lenovo IdeaPad U150 */ CXT5066_THINKPAD, /* Lenovo ThinkPad T410s, others? */ + CXT5066_HP_LAPTOP, /* HP Laptop */ CXT5066_MODELS }; @@ -3041,6 +3079,7 @@ static const char *cxt5066_models[CXT5066_MODELS] = { [CXT5066_DELL_VOSTO] = "dell-vostro", [CXT5066_IDEAPAD] = "ideapad", [CXT5066_THINKPAD] = "thinkpad", + [CXT5066_HP_LAPTOP] = "hp-laptop", }; static struct snd_pci_quirk cxt5066_cfg_tbl[] = { @@ -3052,8 +3091,11 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = { SND_PCI_QUIRK(0x1028, 0x02d8, "Dell Vostro", CXT5066_DELL_VOSTO), SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTO), SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD), + SND_PCI_QUIRK(0x103c, 0x360b, "HP G60", CXT5066_HP_LAPTOP), + SND_PCI_QUIRK(0x1179, 0xff1e, "Toshiba Satellite C650D", CXT5066_IDEAPAD), SND_PCI_QUIRK(0x1179, 0xff50, "Toshiba Satellite P500-PSPGSC-01800T", CXT5066_OLPC_XO_1_5), SND_PCI_QUIRK(0x1179, 0xffe0, "Toshiba Satellite Pro T130-15F", CXT5066_OLPC_XO_1_5), + SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400s", CXT5066_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x21b2, "Thinkpad X100e", CXT5066_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x21b3, "Thinkpad Edge 13 (197)", CXT5066_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x21b4, "Thinkpad Edge", CXT5066_IDEAPAD), @@ -3116,6 +3158,23 @@ static int patch_cxt5066(struct hda_codec *codec) spec->num_init_verbs++; spec->dell_automute = 1; break; + case CXT5066_HP_LAPTOP: + codec->patch_ops.init = cxt5066_init; + codec->patch_ops.unsol_event = cxt5066_hp_laptop_event; + spec->init_verbs[spec->num_init_verbs] = + cxt5066_init_verbs_hp_laptop; + spec->num_init_verbs++; + spec->hp_laptop = 1; + spec->mixers[spec->num_mixers++] = cxt5066_mixer_master; + spec->mixers[spec->num_mixers++] = cxt5066_mixers; + /* no S/PDIF out */ + spec->multiout.dig_out_nid = 0; + /* input source automatically selected */ + spec->input_mux = NULL; + spec->port_d_mode = 0; + spec->mic_boost = 3; /* default 30dB gain */ + break; + case CXT5066_OLPC_XO_1_5: codec->patch_ops.init = cxt5066_olpc_init; codec->patch_ops.unsol_event = cxt5066_olpc_unsol_event; diff --git a/sound/pci/hda/patch_nvhdmi.c b/sound/pci/hda/patch_nvhdmi.c index 69b950d..baa108b 100644 --- a/sound/pci/hda/patch_nvhdmi.c +++ b/sound/pci/hda/patch_nvhdmi.c @@ -84,7 +84,7 @@ static struct hda_verb nvhdmi_basic_init_7x[] = { #else /* support all rates and formats */ #define SUPPORTED_RATES \ - (SNDRV_PCM_RATE_22050 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |\ + (SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000 |\ SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 |\ SNDRV_PCM_RATE_192000) #define SUPPORTED_MAXBPS 24 diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 627bf99..a1312a6 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5334,6 +5334,7 @@ static void fillup_priv_adc_nids(struct hda_codec *codec, hda_nid_t *nids, static struct snd_pci_quirk beep_white_list[] = { SND_PCI_QUIRK(0x1043, 0x829f, "ASUS", 1), + SND_PCI_QUIRK(0x1043, 0x83ce, "EeePC", 1), SND_PCI_QUIRK(0x8086, 0xd613, "Intel", 1), {} }; @@ -14452,6 +14453,7 @@ static void alc269_auto_init(struct hda_codec *codec) enum { ALC269_FIXUP_SONY_VAIO, + ALC269_FIXUP_DELL_M101Z, }; static const struct hda_verb alc269_sony_vaio_fixup_verbs[] = { @@ -14463,11 +14465,20 @@ static const struct alc_fixup alc269_fixups[] = { [ALC269_FIXUP_SONY_VAIO] = { .verbs = alc269_sony_vaio_fixup_verbs }, + [ALC269_FIXUP_DELL_M101Z] = { + .verbs = (const struct hda_verb[]) { + /* Enables internal speaker */ + {0x20, AC_VERB_SET_COEF_INDEX, 13}, + {0x20, AC_VERB_SET_PROC_COEF, 0x4040}, + {} + } + }, }; static struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9071, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), SND_PCI_QUIRK(0x104d, 0x9077, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), + SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), {} }; diff --git a/sound/pci/oxygen/oxygen.h b/sound/pci/oxygen/oxygen.h index 6147216..a3409ed 100644 --- a/sound/pci/oxygen/oxygen.h +++ b/sound/pci/oxygen/oxygen.h @@ -155,6 +155,7 @@ void oxygen_pci_remove(struct pci_dev *pci); int oxygen_pci_suspend(struct pci_dev *pci, pm_message_t state); int oxygen_pci_resume(struct pci_dev *pci); #endif +void oxygen_pci_shutdown(struct pci_dev *pci); /* oxygen_mixer.c */ diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c index fad03d6..7e93cf8 100644 --- a/sound/pci/oxygen/oxygen_lib.c +++ b/sound/pci/oxygen/oxygen_lib.c @@ -519,16 +519,21 @@ static void oxygen_init(struct oxygen *chip) } } -static void oxygen_card_free(struct snd_card *card) +static void oxygen_shutdown(struct oxygen *chip) { - struct oxygen *chip = card->private_data; - spin_lock_irq(&chip->reg_lock); chip->interrupt_mask = 0; chip->pcm_running = 0; oxygen_write16(chip, OXYGEN_DMA_STATUS, 0); oxygen_write16(chip, OXYGEN_INTERRUPT_MASK, 0); spin_unlock_irq(&chip->reg_lock); +} + +static void oxygen_card_free(struct snd_card *card) +{ + struct oxygen *chip = card->private_data; + + oxygen_shutdown(chip); if (chip->irq >= 0) free_irq(chip->irq, chip); flush_scheduled_work(); @@ -778,3 +783,13 @@ int oxygen_pci_resume(struct pci_dev *pci) } EXPORT_SYMBOL(oxygen_pci_resume); #endif /* CONFIG_PM */ + +void oxygen_pci_shutdown(struct pci_dev *pci) +{ + struct snd_card *card = pci_get_drvdata(pci); + struct oxygen *chip = card->private_data; + + oxygen_shutdown(chip); + chip->model.cleanup(chip); +} +EXPORT_SYMBOL(oxygen_pci_shutdown); diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index f03a2f2..06c863e 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -95,6 +95,7 @@ static struct pci_driver xonar_driver = { .suspend = oxygen_pci_suspend, .resume = oxygen_pci_resume, #endif + .shutdown = oxygen_pci_shutdown, }; static int __init alsa_card_xonar_init(void) diff --git a/sound/pci/oxygen/xonar_wm87x6.c b/sound/pci/oxygen/xonar_wm87x6.c index dbc4b89..b82c1cf 100644 --- a/sound/pci/oxygen/xonar_wm87x6.c +++ b/sound/pci/oxygen/xonar_wm87x6.c @@ -53,6 +53,8 @@ struct xonar_wm87x6 { struct xonar_generic generic; u16 wm8776_regs[0x17]; u16 wm8766_regs[0x10]; + struct snd_kcontrol *line_adcmux_control; + struct snd_kcontrol *mic_adcmux_control; struct snd_kcontrol *lc_controls[13]; }; @@ -193,6 +195,7 @@ static void xonar_ds_init(struct oxygen *chip) static void xonar_ds_cleanup(struct oxygen *chip) { xonar_disable_output(chip); + wm8776_write(chip, WM8776_RESET, 0); } static void xonar_ds_suspend(struct oxygen *chip) @@ -603,6 +606,7 @@ static int wm8776_input_mux_put(struct snd_kcontrol *ctl, { struct oxygen *chip = ctl->private_data; struct xonar_wm87x6 *data = chip->model_data; + struct snd_kcontrol *other_ctl; unsigned int mux_bit = ctl->private_value; u16 reg; int changed; @@ -610,8 +614,18 @@ static int wm8776_input_mux_put(struct snd_kcontrol *ctl, mutex_lock(&chip->mutex); reg = data->wm8776_regs[WM8776_ADCMUX]; if (value->value.integer.value[0]) { - reg &= ~0x003; reg |= mux_bit; + /* line-in and mic-in are exclusive */ + mux_bit ^= 3; + if (reg & mux_bit) { + reg &= ~mux_bit; + if (mux_bit == 1) + other_ctl = data->line_adcmux_control; + else + other_ctl = data->mic_adcmux_control; + snd_ctl_notify(chip->card, SNDRV_CTL_EVENT_MASK_VALUE, + &other_ctl->id); + } } else reg &= ~mux_bit; changed = reg != data->wm8776_regs[WM8776_ADCMUX]; @@ -963,7 +977,13 @@ static int xonar_ds_mixer_init(struct oxygen *chip) err = snd_ctl_add(chip->card, ctl); if (err < 0) return err; + if (!strcmp(ctl->id.name, "Line Capture Switch")) + data->line_adcmux_control = ctl; + else if (!strcmp(ctl->id.name, "Mic Capture Switch")) + data->mic_adcmux_control = ctl; } + if (!data->line_adcmux_control || !data->mic_adcmux_control) + return -ENXIO; BUILD_BUG_ON(ARRAY_SIZE(lc_controls) != ARRAY_SIZE(data->lc_controls)); for (i = 0; i < ARRAY_SIZE(lc_controls); ++i) { ctl = snd_ctl_new1(&lc_controls[i], chip); diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 2f12da4..581a670 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -579,7 +579,7 @@ static int snd_ps3_delay_to_bytes(struct snd_pcm_substream *substream, rate * delay_ms / 1000) * substream->runtime->channels; - pr_debug(KERN_ERR "%s: time=%d rate=%d bytes=%ld, frames=%d, ret=%d\n", + pr_debug("%s: time=%d rate=%d bytes=%ld, frames=%d, ret=%d\n", __func__, delay_ms, rate, diff --git a/sound/soc/s3c24xx/s3c-dma.c b/sound/soc/s3c24xx/s3c-dma.c index 1b61c23..f1b1bc4 100644 --- a/sound/soc/s3c24xx/s3c-dma.c +++ b/sound/soc/s3c24xx/s3c-dma.c @@ -94,8 +94,7 @@ static void s3c_dma_enqueue(struct snd_pcm_substream *substream) if ((pos + len) > prtd->dma_end) { len = prtd->dma_end - pos; - pr_debug(KERN_DEBUG "%s: corrected dma len %ld\n", - __func__, len); + pr_debug("%s: corrected dma len %ld\n", __func__, len); } ret = s3c2410_dma_enqueue(prtd->params->channel, diff --git a/sound/usb/card.c b/sound/usb/card.c index 9feb00c..4eabafa 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -126,7 +126,7 @@ static void snd_usb_stream_disconnect(struct list_head *head) for (idx = 0; idx < 2; idx++) { subs = &as->substream[idx]; if (!subs->num_formats) - return; + continue; snd_usb_release_substream_urbs(subs, 1); subs->interface = -1; } @@ -216,6 +216,11 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) } switch (protocol) { + default: + snd_printdd(KERN_WARNING "unknown interface protocol %#02x, assuming v1\n", + protocol); + /* fall through */ + case UAC_VERSION_1: { struct uac1_ac_header_descriptor *h1 = control_header; @@ -253,10 +258,6 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif) break; } - - default: - snd_printk(KERN_ERR "unknown protocol version 0x%02x\n", protocol); - return -EINVAL; } return 0; @@ -465,7 +466,13 @@ static void *snd_usb_audio_probe(struct usb_device *dev, goto __error; } - chip->ctrl_intf = alts; + /* + * For devices with more than one control interface, we assume the + * first contains the audio controls. We might need a more specific + * check here in the future. + */ + if (!chip->ctrl_intf) + chip->ctrl_intf = alts; if (err > 0) { /* create normal USB audio interfaces */ diff --git a/sound/usb/clock.c b/sound/usb/clock.c index b853f8d..7754a10 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -295,12 +295,11 @@ int snd_usb_init_sample_rate(struct snd_usb_audio *chip, int iface, switch (altsd->bInterfaceProtocol) { case UAC_VERSION_1: + default: return set_sample_rate_v1(chip, iface, alts, fmt, rate); case UAC_VERSION_2: return set_sample_rate_v2(chip, iface, alts, fmt, rate); } - - return -EINVAL; } diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 1a701f1..ef0a07e 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -275,6 +275,12 @@ int snd_usb_parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no) /* get audio formats */ switch (protocol) { + default: + snd_printdd(KERN_WARNING "%d:%u:%d: unknown interface protocol %#02x, assuming v1\n", + dev->devnum, iface_no, altno, protocol); + protocol = UAC_VERSION_1; + /* fall through */ + case UAC_VERSION_1: { struct uac1_as_header_descriptor *as = snd_usb_find_csint_desc(alts->extra, alts->extralen, NULL, UAC_AS_GENERAL); @@ -336,11 +342,6 @@ int snd_usb_parse_audio_endpoints(struct snd_usb_audio *chip, int iface_no) dev->devnum, iface_no, altno, as->bTerminalLink); continue; } - - default: - snd_printk(KERN_ERR "%d:%u:%d : unknown interface protocol %04x\n", - dev->devnum, iface_no, altno, protocol); - continue; } /* get format type */ diff --git a/sound/usb/format.c b/sound/usb/format.c index 3a13754..6914821 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -49,7 +49,8 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, u64 pcm_formats; switch (protocol) { - case UAC_VERSION_1: { + case UAC_VERSION_1: + default: { struct uac_format_type_i_discrete_descriptor *fmt = _fmt; sample_width = fmt->bBitResolution; sample_bytes = fmt->bSubframeSize; @@ -64,9 +65,6 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip, format <<= 1; break; } - - default: - return -EINVAL; } pcm_formats = 0; @@ -384,6 +382,10 @@ static int parse_audio_format_i(struct snd_usb_audio *chip, * audio class v2 uses class specific EP0 range requests for that. */ switch (protocol) { + default: + snd_printdd(KERN_WARNING "%d:%u:%d : invalid protocol version %d, assuming v1\n", + chip->dev->devnum, fp->iface, fp->altsetting, protocol); + /* fall through */ case UAC_VERSION_1: fp->channels = fmt->bNrChannels; ret = parse_audio_format_rates_v1(chip, fp, (unsigned char *) fmt, 7); @@ -392,10 +394,6 @@ static int parse_audio_format_i(struct snd_usb_audio *chip, /* fp->channels is already set in this case */ ret = parse_audio_format_rates_v2(chip, fp); break; - default: - snd_printk(KERN_ERR "%d:%u:%d : invalid protocol version %d\n", - chip->dev->devnum, fp->iface, fp->altsetting, protocol); - return -EINVAL; } if (fp->channels < 1) { @@ -438,6 +436,10 @@ static int parse_audio_format_ii(struct snd_usb_audio *chip, fp->channels = 1; switch (protocol) { + default: + snd_printdd(KERN_WARNING "%d:%u:%d : invalid protocol version %d, assuming v1\n", + chip->dev->devnum, fp->iface, fp->altsetting, protocol); + /* fall through */ case UAC_VERSION_1: { struct uac_format_type_ii_discrete_descriptor *fmt = _fmt; brate = le16_to_cpu(fmt->wMaxBitRate); @@ -456,10 +458,6 @@ static int parse_audio_format_ii(struct snd_usb_audio *chip, ret = parse_audio_format_rates_v2(chip, fp); break; } - default: - snd_printk(KERN_ERR "%d:%u:%d : invalid protocol version %d\n", - chip->dev->devnum, fp->iface, fp->altsetting, protocol); - return -EINVAL; } return ret; diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index c166db0..3ed3901 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -2175,7 +2175,15 @@ int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif, } host_iface = &usb_ifnum_to_if(chip->dev, ctrlif)->altsetting[0]; - mixer->protocol = get_iface_desc(host_iface)->bInterfaceProtocol; + switch (get_iface_desc(host_iface)->bInterfaceProtocol) { + case UAC_VERSION_1: + default: + mixer->protocol = UAC_VERSION_1; + break; + case UAC_VERSION_2: + mixer->protocol = UAC_VERSION_2; + break; + } if ((err = snd_usb_mixer_controls(mixer)) < 0 || (err = snd_usb_mixer_status_create(mixer)) < 0) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 3634ced..3b5135c 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -173,13 +173,12 @@ int snd_usb_init_pitch(struct snd_usb_audio *chip, int iface, switch (altsd->bInterfaceProtocol) { case UAC_VERSION_1: + default: return init_pitch_v1(chip, iface, alts, fmt); case UAC_VERSION_2: return init_pitch_v2(chip, iface, alts, fmt); } - - return -EINVAL; } /* diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 624a96c..6de4313 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -50,6 +50,7 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->children); INIT_LIST_HEAD(&node->val); + node->children_hit = 0; node->parent = NULL; node->hit = 0; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e72f05c..fcc16e4 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1539,6 +1539,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, goto error; } tev->point.offset = pev->point.offset; + tev->point.retprobe = pev->point.retprobe; tev->nargs = pev->nargs; if (tev->nargs) { tev->args = zalloc(sizeof(struct probe_trace_arg) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 5251366..32b81f7 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -686,6 +686,25 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) char buf[32], *ptr; int ret, nscopes; + if (!is_c_varname(pf->pvar->var)) { + /* Copy raw parameters */ + pf->tvar->value = strdup(pf->pvar->var); + if (pf->tvar->value == NULL) + return -ENOMEM; + if (pf->pvar->type) { + pf->tvar->type = strdup(pf->pvar->type); + if (pf->tvar->type == NULL) + return -ENOMEM; + } + if (pf->pvar->name) { + pf->tvar->name = strdup(pf->pvar->name); + if (pf->tvar->name == NULL) + return -ENOMEM; + } else + pf->tvar->name = NULL; + return 0; + } + if (pf->pvar->name) pf->tvar->name = strdup(pf->pvar->name); else { @@ -700,19 +719,6 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) if (pf->tvar->name == NULL) return -ENOMEM; - if (!is_c_varname(pf->pvar->var)) { - /* Copy raw parameters */ - pf->tvar->value = strdup(pf->pvar->var); - if (pf->tvar->value == NULL) - return -ENOMEM; - if (pf->pvar->type) { - pf->tvar->type = strdup(pf->pvar->type); - if (pf->tvar->type == NULL) - return -ENOMEM; - } - return 0; - } - pr_debug("Searching '%s' variable in context.\n", pf->pvar->var); /* Search child die for local variables and parameters. */ @@ -783,6 +789,16 @@ static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) /* This function has no name. */ tev->point.offset = (unsigned long)pf->addr; + /* Return probe must be on the head of a subprogram */ + if (pf->pev->point.retprobe) { + if (tev->point.offset != 0) { + pr_warning("Return probe must be on the head of" + " a real function\n"); + return -EINVAL; + } + tev->point.retprobe = true; + } + pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1a36773..b2f5ae9 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2268,6 +2268,9 @@ static int setup_list(struct strlist **list, const char *list_str, int symbol__init(void) { + if (symbol_conf.initialized) + return 0; + elf_version(EV_CURRENT); if (symbol_conf.sort_by_name) symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) - @@ -2293,6 +2296,7 @@ int symbol__init(void) symbol_conf.sym_list_str, "symbol") < 0) goto out_free_comm_list; + symbol_conf.initialized = true; return 0; out_free_dso_list: @@ -2304,11 +2308,14 @@ out_free_comm_list: void symbol__exit(void) { + if (!symbol_conf.initialized) + return; strlist__delete(symbol_conf.sym_list); strlist__delete(symbol_conf.dso_list); strlist__delete(symbol_conf.comm_list); vmlinux_path__exit(); symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; + symbol_conf.initialized = false; } int machines__create_kernel_maps(struct rb_root *self, pid_t pid) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b7a8da4..ea95c27 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -69,7 +69,8 @@ struct symbol_conf { show_nr_samples, use_callchain, exclude_other, - show_cpu_utilization; + show_cpu_utilization, + initialized; const char *vmlinux_name, *source_prefix, *field_sep; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b78b794..d4853a5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1958,10 +1958,10 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, cpu); hardware_disable(NULL); break; - case CPU_ONLINE: + case CPU_STARTING: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); - smp_call_function_single(cpu, hardware_enable, NULL, 1); + hardware_enable(NULL); break; } return NOTIFY_OK; @@ -2096,7 +2096,6 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, static struct notifier_block kvm_cpu_notifier = { .notifier_call = kvm_cpu_hotplug, - .priority = 20, /* must be > scheduler priority */ }; static int vm_stat_get(void *_offset, u64 *val)