summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-core.c26
-rw-r--r--block/blk-merge.c20
-rw-r--r--block/blk-settings.c2
-rw-r--r--block/blk.h1
-rw-r--r--block/elevator.c16
-rw-r--r--block/genhd.c3
-rw-r--r--drivers/ide/ide-io.c4
-rw-r--r--drivers/s390/scsi/zfcp_def.h2
-rw-r--r--drivers/s390/scsi/zfcp_fsf.c34
-rw-r--r--drivers/s390/scsi/zfcp_fsf.h12
-rw-r--r--drivers/s390/scsi/zfcp_qdio.c1
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/partitions/check.c10
-rw-r--r--include/linux/bio.h7
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/blktrace_api.h32
-rw-r--r--include/linux/interrupt.h21
-rw-r--r--include/linux/smp.h4
-rw-r--r--kernel/softirq.c129
-rw-r--r--lib/Kconfig.debug5
20 files changed, 305 insertions, 27 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 9e79a485e4f3..c3df30cfb3fc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -257,7 +257,6 @@ void __generic_unplug_device(struct request_queue *q)
q->request_fn(q);
}
-EXPORT_SYMBOL(__generic_unplug_device);
/**
* generic_unplug_device - fire a request queue
@@ -325,6 +324,9 @@ EXPORT_SYMBOL(blk_unplug);
static void blk_invoke_request_fn(struct request_queue *q)
{
+ if (unlikely(blk_queue_stopped(q)))
+ return;
+
/*
* one level of recursion is ok and is much faster than kicking
* the unplug handling
@@ -399,8 +401,13 @@ void blk_sync_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_sync_queue);
/**
- * blk_run_queue - run a single device queue
+ * __blk_run_queue - run a single device queue
* @q: The queue to run
+ *
+ * Description:
+ * See @blk_run_queue. This variant must be called with the queue lock
+ * held and interrupts disabled.
+ *
*/
void __blk_run_queue(struct request_queue *q)
{
@@ -418,6 +425,12 @@ EXPORT_SYMBOL(__blk_run_queue);
/**
* blk_run_queue - run a single device queue
* @q: The queue to run
+ *
+ * Description:
+ * Invoke request handling on this queue, if it has pending work to do.
+ * May be used to restart queueing when a request has completed. Also
+ * See @blk_start_queueing.
+ *
*/
void blk_run_queue(struct request_queue *q)
{
@@ -501,6 +514,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
init_timer(&q->unplug_timer);
setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
INIT_LIST_HEAD(&q->timeout_list);
+ INIT_WORK(&q->unplug_work, blk_unplug_work);
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -884,7 +898,8 @@ EXPORT_SYMBOL(blk_get_request);
*
* This is basically a helper to remove the need to know whether a queue
* is plugged or not if someone just wants to initiate dispatch of requests
- * for this queue.
+ * for this queue. Should be used to start queueing on a device outside
+ * of ->request_fn() context. Also see @blk_run_queue.
*
* The queue lock must be held with interrupts disabled.
*/
@@ -1003,8 +1018,9 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
}
/**
- * part_round_stats() - Round off the performance stats on a struct
- * disk_stats.
+ * part_round_stats() - Round off the performance stats on a struct disk_stats.
+ * @cpu: cpu number for stats access
+ * @part: target partition
*
* The average IO queue length and utilisation statistics are maintained
* by observing the current state of the queue length and the amount of
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 908d3e11ac52..8681cd6f9911 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -77,12 +77,20 @@ void blk_recalc_rq_segments(struct request *rq)
continue;
}
new_segment:
+ if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
+ rq->bio->bi_seg_front_size = seg_size;
+
nr_phys_segs++;
bvprv = bv;
seg_size = bv->bv_len;
highprv = high;
}
+ if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size)
+ rq->bio->bi_seg_front_size = seg_size;
+ if (seg_size > rq->biotail->bi_seg_back_size)
+ rq->biotail->bi_seg_back_size = seg_size;
+
rq->nr_phys_segments = nr_phys_segs;
}
@@ -106,7 +114,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
return 0;
- if (bio->bi_size + nxt->bi_size > q->max_segment_size)
+ if (bio->bi_seg_back_size + nxt->bi_seg_front_size >
+ q->max_segment_size)
return 0;
if (!bio_has_data(bio))
@@ -309,6 +318,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next)
{
int total_phys_segments;
+ unsigned int seg_size =
+ req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size;
/*
* First check if the either of the requests are re-queued
@@ -324,8 +335,13 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
return 0;
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
- if (blk_phys_contig_segment(q, req->biotail, next->bio))
+ if (blk_phys_contig_segment(q, req->biotail, next->bio)) {
+ if (req->nr_phys_segments == 1)
+ req->bio->bi_seg_front_size = seg_size;
+ if (next->nr_phys_segments == 1)
+ next->biotail->bi_seg_back_size = seg_size;
total_phys_segments--;
+ }
if (total_phys_segments > q->max_phys_segments)
return 0;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index b21dcdb64151..41392fbe19ff 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -141,8 +141,6 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
if (q->unplug_delay == 0)
q->unplug_delay = 1;
- INIT_WORK(&q->unplug_work, blk_unplug_work);
-
q->unplug_timer.function = blk_unplug_timeout;
q->unplug_timer.data = (unsigned long)q;
diff --git a/block/blk.h b/block/blk.h
index e5c579769963..d2e49af90db5 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -20,6 +20,7 @@ void blk_unplug_timeout(unsigned long data);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
void blk_add_timer(struct request *);
+void __generic_unplug_device(struct request_queue *);
/*
* Internal atomic flags for request handling
diff --git a/block/elevator.c b/block/elevator.c
index 04518921db31..59173a69ebdf 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -612,7 +612,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
* processing.
*/
blk_remove_plug(q);
- q->request_fn(q);
+ blk_start_queueing(q);
break;
case ELEVATOR_INSERT_SORT:
@@ -950,7 +950,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
- q->request_fn(q);
+ blk_start_queueing(q);
}
}
}
@@ -1109,8 +1109,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
elv_drain_elevator(q);
while (q->rq.elvpriv) {
- blk_remove_plug(q);
- q->request_fn(q);
+ blk_start_queueing(q);
spin_unlock_irq(q->queue_lock);
msleep(10);
spin_lock_irq(q->queue_lock);
@@ -1166,15 +1165,10 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
- size_t len;
struct elevator_type *e;
- elevator_name[sizeof(elevator_name) - 1] = '\0';
- strncpy(elevator_name, name, sizeof(elevator_name) - 1);
- len = strlen(elevator_name);
-
- if (len && elevator_name[len - 1] == '\n')
- elevator_name[len - 1] = '\0';
+ strlcpy(elevator_name, name, sizeof(elevator_name));
+ strstrip(elevator_name);
e = elevator_get(elevator_name);
if (!e) {
diff --git a/block/genhd.c b/block/genhd.c
index 4cd3433c99ac..646e1d2507c7 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -358,7 +358,6 @@ static int blk_mangle_minor(int minor)
/**
* blk_alloc_devt - allocate a dev_t for a partition
* @part: partition to allocate dev_t for
- * @gfp_mask: memory allocation flag
* @devt: out parameter for resulting dev_t
*
* Allocate a dev_t for block device.
@@ -535,7 +534,7 @@ void unlink_gendisk(struct gendisk *disk)
/**
* get_gendisk - get partitioning information for a given device
* @devt: device to get partitioning information for
- * @part: returned partition index
+ * @partno: returned partition index
*
* This function gets the structure containing partitioning
* information for the given device @devt.
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 77c6eaeacefa..7162d67562af 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1493,8 +1493,8 @@ void ide_do_drive_cmd(ide_drive_t *drive, struct request *rq)
spin_lock_irqsave(&ide_lock, flags);
hwgroup->rq = NULL;
- __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 1);
- __generic_unplug_device(drive->queue);
+ __elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
+ blk_start_queueing(drive->queue);
spin_unlock_irqrestore(&ide_lock, flags);
}
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 8a13071c444c..9ce4c75bd190 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -583,6 +583,8 @@ struct zfcp_fsf_req {
unsigned long long issued; /* request sent time (STCK) */
struct zfcp_unit *unit;
void (*handler)(struct zfcp_fsf_req *);
+ u16 qdio_outb_usage;/* usage of outbound queue */
+ u16 qdio_inb_usage; /* usage of inbound queue */
};
/* driver data */
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 739356a5c123..5ae1d497e5ed 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -6,6 +6,7 @@
* Copyright IBM Corporation 2002, 2008
*/
+#include <linux/blktrace_api.h>
#include "zfcp_ext.h"
static void zfcp_fsf_request_timeout_handler(unsigned long data)
@@ -777,6 +778,7 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
list_add_tail(&req->list, &adapter->req_list[idx]);
spin_unlock(&adapter->req_list_lock);
+ req->qdio_outb_usage = atomic_read(&req_q->count);
req->issued = get_clock();
if (zfcp_qdio_send(req)) {
/* Queues are down..... */
@@ -2082,6 +2084,36 @@ static void zfcp_fsf_req_latency(struct zfcp_fsf_req *req)
spin_unlock_irqrestore(&unit->latencies.lock, flags);
}
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+static void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req)
+{
+ struct fsf_qual_latency_info *lat_inf;
+ struct scsi_cmnd *scsi_cmnd = (struct scsi_cmnd *)fsf_req->data;
+ struct request *req = scsi_cmnd->request;
+ struct zfcp_blk_drv_data trace;
+ int ticks = fsf_req->adapter->timer_ticks;
+
+ trace.flags = 0;
+ trace.magic = ZFCP_BLK_DRV_DATA_MAGIC;
+ if (fsf_req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA) {
+ trace.flags |= ZFCP_BLK_LAT_VALID;
+ lat_inf = &fsf_req->qtcb->prefix.prot_status_qual.latency_info;
+ trace.channel_lat = lat_inf->channel_lat * ticks;
+ trace.fabric_lat = lat_inf->fabric_lat * ticks;
+ }
+ if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)
+ trace.flags |= ZFCP_BLK_REQ_ERROR;
+ trace.inb_usage = fsf_req->qdio_inb_usage;
+ trace.outb_usage = fsf_req->qdio_outb_usage;
+
+ blk_add_driver_data(req->q, req, &trace, sizeof(trace));
+}
+#else
+static inline void zfcp_fsf_trace_latency(struct zfcp_fsf_req *fsf_req)
+{
+}
+#endif
+
static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req)
{
struct scsi_cmnd *scpnt = req->data;
@@ -2114,6 +2146,8 @@ static void zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *req)
if (req->adapter->adapter_features & FSF_FEATURE_MEASUREMENT_DATA)
zfcp_fsf_req_latency(req);
+ zfcp_fsf_trace_latency(req);
+
if (unlikely(fcp_rsp_iu->validity.bits.fcp_rsp_len_valid)) {
if (fcp_rsp_info[3] == RSP_CODE_GOOD)
set_host_byte(scpnt, DID_OK);
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h
index fd3a88777ac8..fa2a31780611 100644
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -439,4 +439,16 @@ struct fsf_qtcb {
u8 log[FSF_QTCB_LOG_SIZE];
} __attribute__ ((packed));
+struct zfcp_blk_drv_data {
+#define ZFCP_BLK_DRV_DATA_MAGIC 0x1
+ u32 magic;
+#define ZFCP_BLK_LAT_VALID 0x1
+#define ZFCP_BLK_REQ_ERROR 0x2
+ u16 flags;
+ u8 inb_usage;
+ u8 outb_usage;
+ u64 channel_lat;
+ u64 fabric_lat;
+} __attribute__ ((packed));
+
#endif /* FSF_H */
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 3e05080e62d4..664752f90b20 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -115,6 +115,7 @@ static void zfcp_qdio_reqid_check(struct zfcp_adapter *adapter,
spin_unlock_irqrestore(&adapter->req_list_lock, flags);
fsf_req->sbal_response = sbal_idx;
+ fsf_req->qdio_inb_usage = atomic_read(&adapter->resp_q.count);
zfcp_fsf_req_complete(fsf_req);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index d84f0469a016..218408eed1bb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1262,7 +1262,7 @@ EXPORT_SYMBOL(ioctl_by_bdev);
/**
* lookup_bdev - lookup a struct block_device by name
- * @pathname: special file representing the block device
+ * @path: special file representing the block device
*
* Get a reference to the blockdevice at @pathname in the current
* namespace if possible and return it. Return ERR_PTR(error)
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index fbeb2f372a93..cfb0c80690aa 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -195,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
return ERR_PTR(res);
}
+static ssize_t part_partition_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hd_struct *p = dev_to_part(dev);
+
+ return sprintf(buf, "%d\n", p->partno);
+}
+
static ssize_t part_start_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -260,6 +268,7 @@ ssize_t part_fail_store(struct device *dev,
}
#endif
+static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
@@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail =
#endif
static struct attribute *part_attrs[] = {
+ &dev_attr_partition.attr,
&dev_attr_start.attr,
&dev_attr_size.attr,
&dev_attr_stat.attr,
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1beda208cbfb..1c91a176b9ae 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -79,6 +79,13 @@ struct bio {
unsigned int bi_size; /* residual I/O count */
+ /*
+ * To keep track of the max segment size, we account for the
+ * sizes of the first and last mergeable segments in this bio.
+ */
+ unsigned int bi_seg_front_size;
+ unsigned int bi_seg_back_size;
+
unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
unsigned int bi_comp_cpu; /* completion CPU */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3491d225268..b4fe68fe3a57 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,7 +865,6 @@ extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int);
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
extern void generic_unplug_device(struct request_queue *);
-extern void __generic_unplug_device(struct request_queue *);
extern long nr_blockdev_pages(void);
int blk_get_queue(struct request_queue *);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 3a31eb506164..bdf505d33e77 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -24,6 +24,7 @@ enum blktrace_cat {
BLK_TC_AHEAD = 1 << 11, /* readahead */
BLK_TC_META = 1 << 12, /* metadata */
BLK_TC_DISCARD = 1 << 13, /* discard requests */
+ BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */
BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
};
@@ -51,6 +52,7 @@ enum blktrace_act {
__BLK_TA_BOUNCE, /* bio was bounced */
__BLK_TA_REMAP, /* bio was remapped */
__BLK_TA_ABORT, /* request aborted */
+ __BLK_TA_DRV_DATA, /* driver-specific binary data */
};
/*
@@ -82,6 +84,7 @@ enum blktrace_notify {
#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
@@ -317,6 +320,34 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
}
+/**
+ * blk_add_driver_data - Add binary message with driver-specific data
+ * @q: queue the io is for
+ * @rq: io request
+ * @data: driver-specific data
+ * @len: length of driver-specific data
+ *
+ * Description:
+ * Some drivers might want to write driver-specific data per request.
+ *
+ **/
+static inline void blk_add_driver_data(struct request_queue *q,
+ struct request *rq,
+ void *data, size_t len)
+{
+ struct blk_trace *bt = q->blk_trace;
+
+ if (likely(!bt))
+ return;
+
+ if (blk_pc_request(rq))
+ __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
+ rq->errors, len, data);
+ else
+ __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+ 0, BLK_TA_DRV_DATA, rq->errors, len, data);
+}
+
extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
char __user *arg);
extern int blk_trace_startstop(struct request_queue *q, int start);
@@ -330,6 +361,7 @@ extern int blk_trace_remove(struct request_queue *q);
#define blk_add_trace_generic(q, rq, rw, what) do { } while (0)
#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0)
#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0)
+#define blk_add_driver_data(q, rq, data, len) do {} while (0)
#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
#define blk_trace_setup(q, name, dev, arg) (-ENOTTY)
#define blk_trace_startstop(q, start) (-ENOTTY)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 54b3623434ec..35a61dc60d51 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -11,6 +11,8 @@
#include <linux/hardirq.h>
#include <linux/sched.h>
#include <linux/irqflags.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
#include <asm/atomic.h>
#include <asm/ptrace.h>
#include <asm/system.h>
@@ -273,6 +275,25 @@ extern void softirq_init(void);
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+/* This is the worklist that queues up per-cpu softirq work.
+ *
+ * send_remote_sendirq() adds work to these lists, and
+ * the softirq handler itself dequeues from them. The queues
+ * are protected by disabling local cpu interrupts and they must
+ * only be accessed by the local cpu that they are for.
+ */
+DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+
+/* Try to send a softirq to a remote cpu. If this cannot be done, the
+ * work will be queued to the local cpu.
+ */
+extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq);
+
+/* Like send_remote_softirq(), but the caller must disable local cpu interrupts
+ * and compute the current cpu, passed in as 'this_cpu'.
+ */
+extern void __send_remote_softirq(struct call_single_data *cp, int cpu,
+ int this_cpu, int softirq);
/* Tasklets --- multithreaded analogue of BHs.
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 66484d4a8459..2e4d58b26c06 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -7,6 +7,7 @@
*/
#include <linux/errno.h>
+#include <linux/types.h>
#include <linux/list.h>
#include <linux/cpumask.h>
@@ -16,7 +17,8 @@ struct call_single_data {
struct list_head list;
void (*func) (void *info);
void *info;
- unsigned int flags;
+ u16 flags;
+ u16 priv;
};
#ifdef CONFIG_SMP
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 37d67aa2d56f..83ba21a13bd4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -6,6 +6,8 @@
* Distribute under GPLv2.
*
* Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+ *
+ * Remote softirq infrastructure is by Jens Axboe.
*/
#include <linux/module.h>
@@ -474,17 +476,144 @@ void tasklet_kill(struct tasklet_struct *t)
EXPORT_SYMBOL(tasklet_kill);
+DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+EXPORT_PER_CPU_SYMBOL(softirq_work_list);
+
+static void __local_trigger(struct call_single_data *cp, int softirq)
+{
+ struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
+
+ list_add_tail(&cp->list, head);
+
+ /* Trigger the softirq only if the list was previously empty. */
+ if (head->next == &cp->list)
+ raise_softirq_irqoff(softirq);
+}
+
+#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
+static void remote_softirq_receive(void *data)
+{
+ struct call_single_data *cp = data;
+ unsigned long flags;
+ int softirq;
+
+ softirq = cp->priv;
+
+ local_irq_save(flags);
+ __local_trigger(cp, softirq);
+ local_irq_restore(flags);
+}
+
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+ if (cpu_online(cpu)) {
+ cp->func = remote_softirq_receive;
+ cp->info = cp;
+ cp->flags = 0;
+ cp->priv = softirq;
+
+ __smp_call_function_single(cpu, cp);
+ return 0;
+ }
+ return 1;
+}
+#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
+static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+ return 1;
+}
+#endif
+
+/**
+ * __send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @this_cpu: the currently executing cpu
+ * @softirq: the softirq for the work
+ *
+ * Attempt to schedule softirq work on a remote cpu. If this cannot be
+ * done, the work is instead queued up on the local cpu.
+ *
+ * Interrupts must be disabled.
+ */
+void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
+{
+ if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
+ __local_trigger(cp, softirq);
+}
+EXPORT_SYMBOL(__send_remote_softirq);
+
+/**
+ * send_remote_softirq - try to schedule softirq work on a remote cpu
+ * @cp: private SMP call function data area
+ * @cpu: the remote cpu
+ * @softirq: the softirq for the work
+ *
+ * Like __send_remote_softirq except that disabling interrupts and
+ * computing the current cpu is done for the caller.
+ */
+void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
+{
+ unsigned long flags;
+ int this_cpu;
+
+ local_irq_save(flags);
+ this_cpu = smp_processor_id();
+ __send_remote_softirq(cp, cpu, this_cpu, softirq);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(send_remote_softirq);
+
+static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ int cpu = (unsigned long) hcpu;
+ int i;
+
+ local_irq_disable();
+ for (i = 0; i < NR_SOFTIRQS; i++) {
+ struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
+ struct list_head *local_head;
+
+ if (list_empty(head))
+ continue;
+
+ local_head = &__get_cpu_var(softirq_work_list[i]);
+ list_splice_init(head, local_head);
+ raise_softirq_irqoff(i);
+ }
+ local_irq_enable();
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
+ .notifier_call = remote_softirq_cpu_notify,
+};
+
void __init softirq_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
+ int i;
+
per_cpu(tasklet_vec, cpu).tail =
&per_cpu(tasklet_vec, cpu).head;
per_cpu(tasklet_hi_vec, cpu).tail =
&per_cpu(tasklet_hi_vec, cpu).head;
+ for (i = 0; i < NR_SOFTIRQS; i++)
+ INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
}
+ register_hotcpu_notifier(&remote_softirq_cpu_notifier);
+
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 31d784dd80d0..b0f239e443bc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -652,6 +652,11 @@ config DEBUG_BLOCK_EXT_DEVT
depends on BLOCK
default n
help
+ BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
+ SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
+ YOU ARE DOING. Distros, please enable this and fix whatever
+ is broken.
+
Conventionally, block device numbers are allocated from
predetermined contiguous area. However, extended block area
may introduce non-contiguous block device numbers. This