|
7 | 7 | #include <linux/blkdev.h> |
8 | 8 | #include <linux/build_bug.h> |
9 | 9 | #include <linux/debugfs.h> |
| 10 | +#include <linux/percpu.h> |
10 | 11 |
|
11 | 12 | #include "blk.h" |
12 | 13 | #include "blk-mq.h" |
@@ -484,6 +485,54 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m) |
484 | 485 | return 0; |
485 | 486 | } |
486 | 487 |
|
| 488 | +/** |
| 489 | + * hctx_wait_on_hw_tag_show - display hardware tag starvation count |
| 490 | + * @data: generic pointer to the associated hardware context (hctx) |
| 491 | + * @m: seq_file pointer for debugfs output formatting |
| 492 | + * |
| 493 | + * Prints the cumulative number of times a submitting context was forced |
| 494 | + * to block due to the exhaustion of physical hardware driver tags. |
| 495 | + * |
| 496 | + * Return: 0 on success. |
| 497 | + */ |
| 498 | +static int hctx_wait_on_hw_tag_show(void *data, struct seq_file *m) |
| 499 | +{ |
| 500 | + struct blk_mq_hw_ctx *hctx = data; |
| 501 | + unsigned long count = 0; |
| 502 | + int cpu; |
| 503 | + |
| 504 | + if (hctx->wait_on_hw_tag) { |
| 505 | + for_each_possible_cpu(cpu) |
| 506 | + count += *per_cpu_ptr(hctx->wait_on_hw_tag, cpu); |
| 507 | + } |
| 508 | + seq_printf(m, "%lu\n", count); |
| 509 | + return 0; |
| 510 | +} |
| 511 | + |
| 512 | +/** |
| 513 | + * hctx_wait_on_sched_tag_show - display scheduler tag starvation count |
| 514 | + * @data: generic pointer to the associated hardware context (hctx) |
| 515 | + * @m: seq_file pointer for debugfs output formatting |
| 516 | + * |
| 517 | + * Prints the cumulative number of times a submitting context was forced |
| 518 | + * to block due to the exhaustion of software scheduler tags. |
| 519 | + * |
| 520 | + * Return: 0 on success. |
| 521 | + */ |
| 522 | +static int hctx_wait_on_sched_tag_show(void *data, struct seq_file *m) |
| 523 | +{ |
| 524 | + struct blk_mq_hw_ctx *hctx = data; |
| 525 | + unsigned long count = 0; |
| 526 | + int cpu; |
| 527 | + |
| 528 | + if (hctx->wait_on_sched_tag) { |
| 529 | + for_each_possible_cpu(cpu) |
| 530 | + count += *per_cpu_ptr(hctx->wait_on_sched_tag, cpu); |
| 531 | + } |
| 532 | + seq_printf(m, "%lu\n", count); |
| 533 | + return 0; |
| 534 | +} |
| 535 | + |
487 | 536 | #define CTX_RQ_SEQ_OPS(name, type) \ |
488 | 537 | static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \ |
489 | 538 | __acquires(&ctx->lock) \ |
@@ -599,6 +648,8 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = { |
599 | 648 | {"active", 0400, hctx_active_show}, |
600 | 649 | {"dispatch_busy", 0400, hctx_dispatch_busy_show}, |
601 | 650 | {"type", 0400, hctx_type_show}, |
| 651 | + {"wait_on_hw_tag", 0400, hctx_wait_on_hw_tag_show}, |
| 652 | + {"wait_on_sched_tag", 0400, hctx_wait_on_sched_tag_show}, |
602 | 653 | {}, |
603 | 654 | }; |
604 | 655 |
|
@@ -815,3 +866,61 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) |
815 | 866 | debugfs_remove_recursive(hctx->sched_debugfs_dir); |
816 | 867 | hctx->sched_debugfs_dir = NULL; |
817 | 868 | } |
| 869 | + |
| 870 | +/** |
| 871 | + * blk_mq_debugfs_alloc_hctx_stats - Allocate per-cpu starvation statistics |
| 872 | + * @hctx: hardware context associated with the tag allocation |
| 873 | + * @gfp: memory allocation flags |
| 874 | + * |
| 875 | + * Allocates the per-cpu memory for tracking hardware and scheduler tag |
| 876 | + * starvation. |
| 877 | + */ |
| 878 | +void blk_mq_debugfs_alloc_hctx_stats(struct blk_mq_hw_ctx *hctx, gfp_t gfp) |
| 879 | +{ |
| 880 | + if (!hctx->wait_on_hw_tag) |
| 881 | + hctx->wait_on_hw_tag = alloc_percpu_gfp(unsigned long, |
| 882 | + gfp); |
| 883 | + if (!hctx->wait_on_sched_tag) |
| 884 | + hctx->wait_on_sched_tag = alloc_percpu_gfp(unsigned long, |
| 885 | + gfp); |
| 886 | +} |
| 887 | + |
| 888 | +/** |
| 889 | + * blk_mq_debugfs_free_hctx_stats - Free per-cpu starvation statistics |
| 890 | + * @hctx: hardware context associated with the tag allocation |
| 891 | + * |
| 892 | + * Frees the per-cpu memory used for tracking hardware and scheduler tag |
| 893 | + * starvation. This must only be called during hardware queue teardown when |
| 894 | + * the queue is safely frozen and no active I/O submissions can race to |
| 895 | + * increment the statistics. |
| 896 | + */ |
| 897 | +void blk_mq_debugfs_free_hctx_stats(struct blk_mq_hw_ctx *hctx) |
| 898 | +{ |
| 899 | + free_percpu(hctx->wait_on_hw_tag); |
| 900 | + hctx->wait_on_hw_tag = NULL; |
| 901 | + free_percpu(hctx->wait_on_sched_tag); |
| 902 | + hctx->wait_on_sched_tag = NULL; |
| 903 | +} |
| 904 | + |
| 905 | +/** |
| 906 | + * blk_mq_debugfs_inc_wait_tags - increment the tag starvation counters |
| 907 | + * @hctx: hardware context associated with the tag allocation |
| 908 | + * @is_sched: true if the starved pool is the software scheduler |
| 909 | + * |
| 910 | + * Evaluates the exhausted tag pool and safely increments the appropriate |
| 911 | + * per-cpu debugfs starvation counter. |
| 912 | + * |
| 913 | + * Note: The per-cpu pointers are explicitly checked to prevent a NULL |
| 914 | + * pointer dereference in the event that the system was under heavy memory |
| 915 | + * pressure and the initial per-cpu allocation failed. |
| 916 | + */ |
| 917 | +void blk_mq_debugfs_inc_wait_tags(struct blk_mq_hw_ctx *hctx, |
| 918 | + bool is_sched) |
| 919 | +{ |
| 920 | + unsigned long __percpu *tags = is_sched ? |
| 921 | + READ_ONCE(hctx->wait_on_sched_tag) : |
| 922 | + READ_ONCE(hctx->wait_on_hw_tag); |
| 923 | + |
| 924 | + if (likely(tags)) |
| 925 | + this_cpu_inc(*tags); |
| 926 | +} |
0 commit comments