Skip to content

Commit b69ffea

Browse files
longlimsftmartinkpetersen
authored andcommitted
scsi: storvsc: Prefer returning channel with the same CPU as on the I/O issuing CPU
When selecting an outgoing channel for I/O, storvsc tries to select a channel with a returning CPU that is not the same as issuing CPU. This worked well in the past, however it doesn't work well when the Hyper-V exposes a large number of channels (up to the number of all CPUs). Use a different CPU for returning channel is not efficient on Hyper-V. Change this behavior by preferring to the channel with the same CPU as the current I/O issuing CPU whenever possible. Tests have shown improvements in newer Hyper-V/Azure environment, and no regression with older Hyper-V/Azure environments. Tested-by: Raheel Abdul Faizy <[email protected]> Signed-off-by: Long Li <[email protected]> Message-Id: <[email protected]> Signed-off-by: Martin K. Petersen <[email protected]>
1 parent 558ae45 commit b69ffea

1 file changed

Lines changed: 45 additions & 51 deletions

File tree

drivers/scsi/storvsc_drv.c

Lines changed: 45 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
14061406
}
14071407

14081408
/*
1409-
* Our channel array is sparsley populated and we
1409+
* Our channel array could be sparsley populated and we
14101410
* initiated I/O on a processor/hw-q that does not
14111411
* currently have a designated channel. Fix this.
14121412
* The strategy is simple:
1413-
* I. Ensure NUMA locality
1414-
* II. Distribute evenly (best effort)
1413+
* I. Prefer the channel associated with the current CPU
1414+
* II. Ensure NUMA locality
1415+
* III. Distribute evenly (best effort)
14151416
*/
14161417

1418+
/* Prefer the channel on the I/O issuing processor/hw-q */
1419+
if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus))
1420+
return stor_device->stor_chns[q_num];
1421+
14171422
node_mask = cpumask_of_node(cpu_to_node(q_num));
14181423

14191424
num_channels = 0;
@@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device,
14691474
/* See storvsc_change_target_cpu(). */
14701475
outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
14711476
if (outgoing_channel != NULL) {
1472-
if (outgoing_channel->target_cpu == q_num) {
1473-
/*
1474-
* Ideally, we want to pick a different channel if
1475-
* available on the same NUMA node.
1476-
*/
1477-
node_mask = cpumask_of_node(cpu_to_node(q_num));
1478-
for_each_cpu_wrap(tgt_cpu,
1479-
&stor_device->alloced_cpus, q_num + 1) {
1480-
if (!cpumask_test_cpu(tgt_cpu, node_mask))
1481-
continue;
1482-
if (tgt_cpu == q_num)
1483-
continue;
1484-
channel = READ_ONCE(
1485-
stor_device->stor_chns[tgt_cpu]);
1486-
if (channel == NULL)
1487-
continue;
1488-
if (hv_get_avail_to_write_percent(
1489-
&channel->outbound)
1490-
> ring_avail_percent_lowater) {
1491-
outgoing_channel = channel;
1492-
goto found_channel;
1493-
}
1494-
}
1477+
if (hv_get_avail_to_write_percent(&outgoing_channel->outbound)
1478+
> ring_avail_percent_lowater)
1479+
goto found_channel;
14951480

1496-
/*
1497-
* All the other channels on the same NUMA node are
1498-
* busy. Try to use the channel on the current CPU
1499-
*/
1500-
if (hv_get_avail_to_write_percent(
1501-
&outgoing_channel->outbound)
1502-
> ring_avail_percent_lowater)
1481+
/*
1482+
* Channel is busy, try to find a channel on the same NUMA node
1483+
*/
1484+
node_mask = cpumask_of_node(cpu_to_node(q_num));
1485+
for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
1486+
q_num + 1) {
1487+
if (!cpumask_test_cpu(tgt_cpu, node_mask))
1488+
continue;
1489+
channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
1490+
if (!channel)
1491+
continue;
1492+
if (hv_get_avail_to_write_percent(&channel->outbound)
1493+
> ring_avail_percent_lowater) {
1494+
outgoing_channel = channel;
15031495
goto found_channel;
1496+
}
1497+
}
15041498

1505-
/*
1506-
* If we reach here, all the channels on the current
1507-
* NUMA node are busy. Try to find a channel in
1508-
* other NUMA nodes
1509-
*/
1510-
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
1511-
if (cpumask_test_cpu(tgt_cpu, node_mask))
1512-
continue;
1513-
channel = READ_ONCE(
1514-
stor_device->stor_chns[tgt_cpu]);
1515-
if (channel == NULL)
1516-
continue;
1517-
if (hv_get_avail_to_write_percent(
1518-
&channel->outbound)
1519-
> ring_avail_percent_lowater) {
1520-
outgoing_channel = channel;
1521-
goto found_channel;
1522-
}
1499+
/*
1500+
* If we reach here, all the channels on the current
1501+
* NUMA node are busy. Try to find a channel in
1502+
* all NUMA nodes
1503+
*/
1504+
for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus,
1505+
q_num + 1) {
1506+
channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]);
1507+
if (!channel)
1508+
continue;
1509+
if (hv_get_avail_to_write_percent(&channel->outbound)
1510+
> ring_avail_percent_lowater) {
1511+
outgoing_channel = channel;
1512+
goto found_channel;
15231513
}
15241514
}
1515+
/*
1516+
* If we reach here, all the channels are busy. Use the
1517+
* original channel found.
1518+
*/
15251519
} else {
15261520
spin_lock_irqsave(&stor_device->lock, flags);
15271521
outgoing_channel = stor_device->stor_chns[q_num];

0 commit comments

Comments
 (0)