diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 4ce34fa..a4f9133 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -40,6 +40,7 @@ typedef int (congested_fn)(void *, int);
 enum bdi_stat_item {
 	BDI_RECLAIMABLE,
 	BDI_WRITEBACK,
+	BDI_WRITTEN,
 	NR_BDI_STAT_ITEMS
 };
 
@@ -88,6 +89,15 @@ struct backing_dev_info {
 
 	struct timer_list laptop_mode_wb_timer;
 
+	spinlock_t balance_lock;	/* lock protecting entries below */
+	struct list_head balance_list;	/* waiters in balance_dirty_pages */
+	unsigned int balance_waiters;	/* number of waiters in the list */
+	struct delayed_work balance_work;	/* work distributing page
+						   completions among waiters */
+	unsigned long written_start;	/* BDI_WRITTEN last time we scanned balance_list*/
+	unsigned long start_jiffies;	/* time when we last scanned list */
+	unsigned long pages_per_s;	/* estimated throughput of bdi */
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
 	struct dentry *debug_stats;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0ead399..901c33f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -129,6 +129,7 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
 			       unsigned long dirty);
 
 void page_writeback_init(void);
+void distribute_page_completions(struct work_struct *work);
 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 					unsigned long nr_pages_dirtied);
 
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 4e249b9..00b06a2 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -147,11 +147,95 @@ DEFINE_EVENT(wbc_class, name, \
 DEFINE_WBC_EVENT(wbc_writeback_start);
 DEFINE_WBC_EVENT(wbc_writeback_written);
 DEFINE_WBC_EVENT(wbc_writeback_wait);
-DEFINE_WBC_EVENT(wbc_balance_dirty_start);
-DEFINE_WBC_EVENT(wbc_balance_dirty_written);
-DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
 DEFINE_WBC_EVENT(wbc_writepage);
 
+TRACE_EVENT(writeback_balance_dirty_pages_waiting,
+	TP_PROTO(struct backing_dev_info *bdi, unsigned long pages),
+	TP_ARGS(bdi, pages),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(unsigned long, pages)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->pages = pages;
+	),
+	TP_printk("bdi=%s pages=%lu",
+		  __entry->name, __entry->pages
+	)
+);
+
+TRACE_EVENT(writeback_balance_dirty_pages_woken,
+	TP_PROTO(struct backing_dev_info *bdi),
+	TP_ARGS(bdi),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+	),
+	TP_printk("bdi=%s",
+		  __entry->name
+	)
+);
+
+TRACE_EVENT(writeback_distribute_page_completions,
+	TP_PROTO(struct backing_dev_info *bdi, unsigned long written),
+	TP_ARGS(bdi, written),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(unsigned long, start)
+		__field(unsigned long, written)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->start = bdi->written_start;
+		__entry->written = written - bdi->written_start;
+	),
+	TP_printk("bdi=%s written_start=%lu to_distribute=%lu",
+		  __entry->name, __entry->start, __entry->written
+	)
+);
+
+TRACE_EVENT(writeback_distribute_page_completions_wakeall,
+	TP_PROTO(struct backing_dev_info *bdi),
+	TP_ARGS(bdi),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+	),
+	TP_printk("bdi=%s",
+		  __entry->name
+	)
+);
+
+TRACE_EVENT(writeback_distribute_page_completions_scheduled,
+	TP_PROTO(struct backing_dev_info *bdi, unsigned long nap,
+		 unsigned long pages),
+	TP_ARGS(bdi, nap, pages),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(unsigned long, nap)
+		__field(unsigned long, pages)
+		__field(unsigned long, waiters)
+		__field(unsigned long, pages_per_s)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->nap = nap;
+		__entry->pages = pages;
+		__entry->waiters = bdi->balance_waiters;
+		__entry->pages_per_s = bdi->pages_per_s;
+	),
+	TP_printk("bdi=%s sleep=%u ms want_pages=%lu waiters=%lu"
+		  " pages_per_s=%lu",
+		  __entry->name, jiffies_to_msecs(__entry->nap),
+		  __entry->pages, __entry->waiters, __entry->pages_per_s
+	)
+);
+
 DECLARE_EVENT_CLASS(writeback_congest_waited_template,
 
 	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d..e2cbe5c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -92,6 +92,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   "BdiDirtyThresh:   %8lu kB\n"
 		   "DirtyThresh:      %8lu kB\n"
 		   "BackgroundThresh: %8lu kB\n"
+		   "BdiWritten:       %8lu kB\n"
 		   "b_dirty:          %8lu\n"
 		   "b_io:             %8lu\n"
 		   "b_more_io:        %8lu\n"
@@ -99,8 +100,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 		   "state:            %8lx\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
 		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
-		   K(bdi_thresh), K(dirty_thresh),
-		   K(background_thresh), nr_dirty, nr_io, nr_more_io,
+		   K(bdi_thresh), K(dirty_thresh), K(background_thresh),
+		   (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)),
+		   nr_dirty, nr_io, nr_more_io,
 		   !list_empty(&bdi->bdi_list), bdi->state);
 #undef K
 
@@ -650,6 +652,14 @@ int bdi_init(struct backing_dev_info *bdi)
 	INIT_LIST_HEAD(&bdi->bdi_list);
 	INIT_LIST_HEAD(&bdi->work_list);
 
+	spin_lock_init(&bdi->balance_lock);
+	INIT_LIST_HEAD(&bdi->balance_list);
+	bdi->written_start = 0;
+	bdi->start_jiffies = 0;
+	bdi->balance_waiters = 0;
+	INIT_DELAYED_WORK(&bdi->balance_work, distribute_page_completions);
+	bdi->pages_per_s = 1;
+
 	bdi_wb_init(&bdi->wb, bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
@@ -689,6 +699,8 @@ void bdi_destroy(struct backing_dev_info *bdi)
 		spin_unlock(&inode_lock);
 	}
 
+	cancel_delayed_work_sync(&bdi->balance_work);
+	WARN_ON(!list_empty(&bdi->balance_list));
 	bdi_unregister(bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2cb01f6..09f1adf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -43,16 +43,11 @@
 static long ratelimit_pages = 32;
 
 /*
- * When balance_dirty_pages decides that the caller needs to perform some
- * non-background writeback, this is how many pages it will attempt to write.
- * It should be somewhat larger than dirtied pages to ensure that reasonably
- * large amounts of I/O are submitted.
+ * When balance_dirty_pages decides that the caller needs to wait for some
+ * writeback to happen, this is how many pages it will attempt to write.
  */
 static inline long sync_writeback_pages(unsigned long dirtied)
 {
-	if (dirtied < ratelimit_pages)
-		dirtied = ratelimit_pages;
-
 	return dirtied + dirtied / 2;
 }
 
@@ -132,6 +127,17 @@ static struct prop_descriptor vm_completions;
 static struct prop_descriptor vm_dirties;
 
 /*
+ * Item a process queues to bdi list in balance_dirty_pages() when it gets
+ * throttled
+ */
+struct balance_waiter {
+	struct list_head bw_list;
+	unsigned long bw_wait_pages;	/* Number of pages to wait for to
+					   get written */
+	struct task_struct *bw_task;	/* Task waiting for IO */
+};
+
+/*
  * couple the period to the dirty_ratio:
  *
  *   period/2 ~ roundup_pow_of_two(dirty limit)
@@ -219,6 +225,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
  */
 static inline void __bdi_writeout_inc(struct backing_dev_info *bdi)
 {
+	__inc_bdi_stat(bdi, BDI_WRITTEN);
 	__prop_inc_percpu_max(&vm_completions, &bdi->completions,
 			      bdi->max_prop_frac);
 }
@@ -274,12 +281,13 @@ static inline void task_dirties_fraction(struct task_struct *tsk,
  * effectively curb the growth of dirty pages. Light dirtiers with high enough
  * dirty threshold may never get throttled.
  */
+#define TASK_LIMIT_FRACTION 8
 static unsigned long task_dirty_limit(struct task_struct *tsk,
 				       unsigned long bdi_dirty)
 {
 	long numerator, denominator;
 	unsigned long dirty = bdi_dirty;
-	u64 inv = dirty >> 3;
+	u64 inv = dirty / TASK_LIMIT_FRACTION;
 
 	task_dirties_fraction(tsk, &numerator, &denominator);
 	inv *= numerator;
@@ -290,6 +298,12 @@ static unsigned long task_dirty_limit(struct task_struct *tsk,
 	return max(dirty, bdi_dirty/2);
 }
 
+/* Minimum limit for any task */
+static unsigned long task_min_dirty_limit(unsigned long bdi_dirty)
+{
+	return bdi_dirty - bdi_dirty / TASK_LIMIT_FRACTION;
+}
+
 /*
  *
  */
@@ -468,133 +482,312 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
 	return bdi_dirty;
 }
 
-/*
- * balance_dirty_pages() must be called by processes which are generating dirty
- * data.  It looks at the number of dirty pages in the machine and will force
- * the caller to perform writeback if the system is over `vm_dirty_ratio'.
- * If we're over `background_thresh' then the writeback threads are woken to
- * perform some writeout.
- */
-static void balance_dirty_pages(struct address_space *mapping,
-				unsigned long write_chunk)
-{
-	long nr_reclaimable, bdi_nr_reclaimable;
-	long nr_writeback, bdi_nr_writeback;
+struct dirty_limit_state {
+	long nr_reclaimable;
+	long nr_writeback;
+	long bdi_nr_reclaimable;
+	long bdi_nr_writeback;
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
-	unsigned long pages_written = 0;
-	unsigned long pause = 1;
-	bool dirty_exceeded = false;
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
+};
 
-	for (;;) {
-		struct writeback_control wbc = {
-			.sync_mode	= WB_SYNC_NONE,
-			.older_than_this = NULL,
-			.nr_to_write	= write_chunk,
-			.range_cyclic	= 1,
-		};
+static void get_global_dirty_limit_state(struct dirty_limit_state *st)
+{
+	/*
+	 * Note: nr_reclaimable denotes nr_dirty + nr_unstable.  Unstable
+	 * writes are a feature of certain networked filesystems (i.e. NFS) in
+	 * which data may have been written to the server's write cache, but
+	 * has not yet been flushed to permanent storage.
+	 */
+	st->nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS);
+	st->nr_writeback = global_page_state(NR_WRITEBACK);
 
-		nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
-					global_page_state(NR_UNSTABLE_NFS);
-		nr_writeback = global_page_state(NR_WRITEBACK);
+	global_dirty_limits(&st->background_thresh, &st->dirty_thresh);
+}
 
-		global_dirty_limits(&background_thresh, &dirty_thresh);
+/* This function expects global state to be already filled in! */
+static void get_bdi_dirty_limit_state(struct backing_dev_info *bdi,
+				      struct dirty_limit_state *st)
+{
+	unsigned long min_bdi_thresh;
 
-		/*
-		 * Throttle it only when the background writeback cannot
-		 * catch-up. This avoids (excessively) small writeouts
-		 * when the bdi limits are ramping up.
-		 */
-		if (nr_reclaimable + nr_writeback <=
-				(background_thresh + dirty_thresh) / 2)
-			break;
+	st->bdi_thresh = bdi_dirty_limit(bdi, st->dirty_thresh);
+	min_bdi_thresh = task_min_dirty_limit(st->bdi_thresh);
+	/*
+	 * In order to avoid the stacked BDI deadlock we need to ensure we
+	 * accurately count the 'dirty' pages when the threshold is low.
+	 *
+	 * Otherwise it would be possible to get thresh+n pages reported dirty,
+	 * even though there are thresh-m pages actually dirty; with m+n
+	 * sitting in the percpu deltas.
+	 */
+	if (min_bdi_thresh < 2*bdi_stat_error(bdi)) {
+		st->bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
+		st->bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
+	} else {
+		st->bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+		st->bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
+	}
+}
 
-		bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
-		bdi_thresh = task_dirty_limit(current, bdi_thresh);
+/* Possibly states of dirty memory for BDI */
+enum {
+	DIRTY_OK,			/* Everything below limit */
+	DIRTY_EXCEED_BACKGROUND,	/* Backround writeback limit exceeded */
+	DIRTY_MAY_EXCEED_LIMIT,		/* Some task may exceed its dirty limit */
+	DIRTY_EXCEED_LIMIT,		/* Global dirty limit exceeded */
+};
 
-		/*
-		 * In order to avoid the stacked BDI deadlock we need
-		 * to ensure we accurately count the 'dirty' pages when
-		 * the threshold is low.
-		 *
-		 * Otherwise it would be possible to get thresh+n pages
-		 * reported dirty, even though there are thresh-m pages
-		 * actually dirty; with m+n sitting in the percpu
-		 * deltas.
-		 */
-		if (bdi_thresh < 2*bdi_stat_error(bdi)) {
-			bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
-			bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
-		} else {
-			bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
-			bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
-		}
+static int check_dirty_limits(struct backing_dev_info *bdi,
+			      struct dirty_limit_state *st)
+{
+	unsigned long min_bdi_thresh;
+	int ret = DIRTY_OK;
 
-		/*
-		 * The bdi thresh is somehow "soft" limit derived from the
-		 * global "hard" limit. The former helps to prevent heavy IO
-		 * bdi or process from holding back light ones; The latter is
-		 * the last resort safeguard.
-		 */
-		dirty_exceeded =
-			(bdi_nr_reclaimable + bdi_nr_writeback > bdi_thresh)
-			|| (nr_reclaimable + nr_writeback > dirty_thresh);
+	get_global_dirty_limit_state(st);
+	/*
+	 * Throttle it only when the background writeback cannot catch-up. This
+	 * avoids (excessively) small writeouts when the bdi limits are ramping
+	 * up.
+	 */
+	if (st->nr_reclaimable + st->nr_writeback <=
+			(st->background_thresh + st->dirty_thresh) / 2)
+		goto out;
 
-		if (!dirty_exceeded)
-			break;
+	get_bdi_dirty_limit_state(bdi, st);
+	min_bdi_thresh = task_min_dirty_limit(st->bdi_thresh);
 
-		if (!bdi->dirty_exceeded)
-			bdi->dirty_exceeded = 1;
-
-		/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
-		 * Unstable writes are a feature of certain networked
-		 * filesystems (i.e. NFS) in which data may have been
-		 * written to the server's write cache, but has not yet
-		 * been flushed to permanent storage.
-		 * Only move pages to writeback if this bdi is over its
-		 * threshold otherwise wait until the disk writes catch
-		 * up.
-		 */
-		trace_wbc_balance_dirty_start(&wbc, bdi);
-		if (bdi_nr_reclaimable > bdi_thresh) {
-			writeback_inodes_wb(&bdi->wb, &wbc);
-			pages_written += write_chunk - wbc.nr_to_write;
-			trace_wbc_balance_dirty_written(&wbc, bdi);
-			if (pages_written >= write_chunk)
-				break;		/* We've done our duty */
+	/*
+	 * The bdi thresh is somehow "soft" limit derived from the global
+	 * "hard" limit. The former helps to prevent heavy IO bdi or process
+	 * from holding back light ones; The latter is the last resort
+	 * safeguard.
+	 */
+	if (st->nr_reclaimable + st->nr_writeback > st->dirty_thresh) {
+		ret = DIRTY_EXCEED_LIMIT;
+		goto out;
+	}
+	if (st->bdi_nr_reclaimable + st->bdi_nr_writeback > min_bdi_thresh) {
+		ret = DIRTY_MAY_EXCEED_LIMIT;
+		goto out;
+	}
+	if (st->nr_reclaimable > st->background_thresh)
+		ret = DIRTY_EXCEED_BACKGROUND;
+out:
+	return ret;
+}
+
+static bool bdi_task_limit_exceeded(struct dirty_limit_state *st,
+				    struct task_struct *p)
+{
+	unsigned long bdi_thresh;
+
+	bdi_thresh = task_dirty_limit(p, st->bdi_thresh);
+
+	return st->bdi_nr_reclaimable + st->bdi_nr_writeback > bdi_thresh;
+}
+
+static void balance_waiter_done(struct backing_dev_info *bdi,
+				struct balance_waiter *bw)
+{
+	list_del_init(&bw->bw_list);
+	bdi->balance_waiters--;
+	wake_up_process(bw->bw_task);
+}
+
+static unsigned long compute_distribute_time(struct backing_dev_info *bdi,
+					     unsigned long min_pages)
+{
+	unsigned long nap;
+
+	/*
+	 * Because of round robin distribution, every waiter has to get at
+	 * least min_pages pages.
+	 */
+	min_pages *= bdi->balance_waiters;
+	nap = msecs_to_jiffies(
+			((u64)min_pages) * MSEC_PER_SEC / bdi->pages_per_s);
+	/*
+	 * Force computed sleep time to be in interval (HZ/50..HZ/5)
+	 * so that we
+	 * a) don't wake too often and burn too much CPU
+	 * b) check dirty limits at least once in a while
+	 */
+	nap = max_t(unsigned long, HZ/50, nap);
+	nap = min_t(unsigned long, HZ/4, nap);
+	trace_writeback_distribute_page_completions_scheduled(bdi, nap,
+		min_pages);
+	return nap;
+}
+
+/*
+ * When the throughput is computed, we consider an imaginary WINDOW_MS
+ * miliseconds long window. In this window, we know that it took 'deltams'
+ * miliseconds to write 'written' pages and for the rest of the window we
+ * assume number of pages corresponding to the throughput we previously
+ * computed to have been written. Thus we obtain total number of pages
+ * written in the imaginary window and from it new throughput.
+ */
+#define WINDOW_MS 10000
+
+static void update_bdi_throughput(struct backing_dev_info *bdi,
+				 unsigned long written, unsigned long time)
+{
+	unsigned int deltams = jiffies_to_msecs(time - bdi->start_jiffies);
+
+	written -= bdi->written_start;
+	if (deltams > WINDOW_MS) {
+		/* Add 1 to avoid 0 result */
+		bdi->pages_per_s = 1 + ((u64)written) * MSEC_PER_SEC / deltams;
+		return;
+	}
+	bdi->pages_per_s = 1 +
+		(((u64)bdi->pages_per_s) * (WINDOW_MS - deltams) +
+		 ((u64)written) * MSEC_PER_SEC) / WINDOW_MS;
+}
+
+void distribute_page_completions(struct work_struct *work)
+{
+	struct backing_dev_info *bdi =
+		container_of(work, struct backing_dev_info, balance_work.work);
+	unsigned long written = bdi_stat_sum(bdi, BDI_WRITTEN);
+	unsigned long pages_per_waiter;
+	unsigned long cur_time = jiffies;
+	unsigned long min_pages = ULONG_MAX;
+	struct balance_waiter *waiter, *tmpw;
+	struct dirty_limit_state st;
+	int dirty_exceeded;
+
+	trace_writeback_distribute_page_completions(bdi, written);
+	dirty_exceeded = check_dirty_limits(bdi, &st);
+	if (dirty_exceeded < DIRTY_MAY_EXCEED_LIMIT) {
+		/* Wakeup everybody */
+		trace_writeback_distribute_page_completions_wakeall(bdi);
+		spin_lock(&bdi->balance_lock);
+		list_for_each_entry_safe(
+				waiter, tmpw, &bdi->balance_list, bw_list)
+			balance_waiter_done(bdi, waiter);
+		update_bdi_throughput(bdi, written, cur_time);
+		spin_unlock(&bdi->balance_lock);
+		return;
+	}
+
+	spin_lock(&bdi->balance_lock);
+	update_bdi_throughput(bdi, written, cur_time);
+	bdi->start_jiffies = cur_time;
+	/* Distribute pages equally among waiters */
+	while (!list_empty(&bdi->balance_list)) {
+		pages_per_waiter = (written - bdi->written_start) /
+							bdi->balance_waiters;
+		if (!pages_per_waiter)
+			break;
+		list_for_each_entry_safe(
+				waiter, tmpw, &bdi->balance_list, bw_list) {
+			unsigned long delta = min(pages_per_waiter,
+						  waiter->bw_wait_pages);
+
+			waiter->bw_wait_pages -= delta;
+			bdi->written_start += delta;
+			if (waiter->bw_wait_pages == 0)
+				balance_waiter_done(bdi, waiter);
 		}
-		trace_wbc_balance_dirty_wait(&wbc, bdi);
-		__set_current_state(TASK_UNINTERRUPTIBLE);
-		io_schedule_timeout(pause);
+	}
+	/*
+	 * Wake tasks that might have gotten below their limits and compute
+	 * the number of pages we wait for
+	 */
+	list_for_each_entry_safe(waiter, tmpw, &bdi->balance_list, bw_list) {
+		if (dirty_exceeded == DIRTY_MAY_EXCEED_LIMIT &&
+		    !bdi_task_limit_exceeded(&st, waiter->bw_task))
+			balance_waiter_done(bdi, waiter);
+		else if (waiter->bw_wait_pages < min_pages)
+			min_pages = waiter->bw_wait_pages;
+	}
+	/* More page completions needed? */
+	if (!list_empty(&bdi->balance_list)) {
+		schedule_delayed_work(&bdi->balance_work,
+			      compute_distribute_time(bdi, min_pages));
+	}
+	spin_unlock(&bdi->balance_lock);
+}
 
+/*
+ * balance_dirty_pages() must be called by processes which are generating dirty
+ * data.  It looks at the number of dirty pages in the machine and will force
+ * the caller to perform writeback if the system is over `vm_dirty_ratio'.
+ * If we're over `background_thresh' then the writeback threads are woken to
+ * perform some writeout.
+ */
+static void balance_dirty_pages(struct address_space *mapping,
+				unsigned long write_chunk)
+{
+	struct backing_dev_info *bdi = mapping->backing_dev_info;
+	struct balance_waiter bw;
+	struct dirty_limit_state st;
+	int dirty_exceeded = check_dirty_limits(bdi, &st);
+
+	if (dirty_exceeded < DIRTY_MAY_EXCEED_LIMIT ||
+	    (dirty_exceeded == DIRTY_MAY_EXCEED_LIMIT &&
+	     !bdi_task_limit_exceeded(&st, current))) {
+		if (bdi->dirty_exceeded &&
+		    dirty_exceeded < DIRTY_MAY_EXCEED_LIMIT)
+			bdi->dirty_exceeded = 0;
 		/*
-		 * Increase the delay for each loop, up to our previous
-		 * default of taking a 100ms nap.
+		 * In laptop mode, we wait until hitting the higher threshold
+		 * before starting background writeout, and then write out all
+		 * the way down to the lower threshold.  So slow writers cause
+		 * minimal disk activity.
+		 *
+		 * In normal mode, we start background writeout at the lower
+		 * background_thresh, to keep the amount of dirty memory low.
 		 */
-		pause <<= 1;
-		if (pause > HZ / 10)
-			pause = HZ / 10;
+		if (!laptop_mode && dirty_exceeded == DIRTY_EXCEED_BACKGROUND)
+			bdi_start_background_writeback(bdi);
+		return;
 	}
 
-	if (!dirty_exceeded && bdi->dirty_exceeded)
-		bdi->dirty_exceeded = 0;
+	if (!bdi->dirty_exceeded)
+		bdi->dirty_exceeded = 1;
 
-	if (writeback_in_progress(bdi))
-		return;
+	trace_writeback_balance_dirty_pages_waiting(bdi, write_chunk);
+	/* Kick flusher thread to start doing work if it isn't already */
+	bdi_start_background_writeback(bdi);
 
+	bw.bw_wait_pages = write_chunk;
+	bw.bw_task = current;
+	spin_lock(&bdi->balance_lock);
 	/*
-	 * In laptop mode, we wait until hitting the higher threshold before
-	 * starting background writeout, and then write out all the way down
-	 * to the lower threshold.  So slow writers cause minimal disk activity.
-	 *
-	 * In normal mode, we start background writeout at the lower
-	 * background_thresh, to keep the amount of dirty memory low.
+	 * Add work to the balance list, from now on the structure is handled
+	 * by distribute_page_completions()
+	 */
+	list_add_tail(&bw.bw_list, &bdi->balance_list);
+	bdi->balance_waiters++;
+	/*
+	 * First item? Need to schedule distribution of IO completions among
+	 * items on balance_list
+	 */
+	if (bdi->balance_waiters == 1) {
+		bdi->written_start = bdi_stat_sum(bdi, BDI_WRITTEN);
+		bdi->start_jiffies = jiffies;
+		schedule_delayed_work(&bdi->balance_work,
+			compute_distribute_time(bdi, write_chunk));
+	}
+	/*
+	 * Setting task state must happen inside balance_lock to avoid races
+	 * with distribution function waking us.
+	 */
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+	spin_unlock(&bdi->balance_lock);
+	/* Wait for pages to get written */
+	schedule();
+	/*
+	 * Enough page completions should have happened by now and we should
+	 * have been removed from the list
 	 */
-	if ((laptop_mode && pages_written) ||
-	    (!laptop_mode && (nr_reclaimable > background_thresh)))
-		bdi_start_background_writeback(bdi);
+	WARN_ON(!list_empty(&bw.bw_list));
+	trace_writeback_balance_dirty_pages_woken(bdi);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)